Files
PureRL-7B-v5-07-brierG/trainer_state.json
ModelHub XC 3c904e2565 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-7B-v5-07-brierG
Source: Original Platform
2026-05-28 11:59:20 +08:00

8641 lines
336 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.359375,
"calib/ece": 0.5285714285714285,
"calib/final_conf_rate": 0.0546875,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.9285714285714286,
"calib/gap": 0.02833333333333321,
"calib/mean_conf": 0.9571428571428572,
"calib/mu_c": 0.9733333333333333,
"calib/mu_w": 0.9450000000000001,
"calib/nonempty_final_conf_rate": 0.0546875,
"calib/nonempty_reasoning_rate": 0.078125,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.5285714285714285,
"calib/std_conf": 0.033896601479156206,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 2853.0,
"completions/max_terminated_length": 2853.0,
"completions/mean_length": 658.8203125,
"completions/mean_terminated_length": 714.6525268554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.09911441057920456,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0164,
"num_tokens": 276242.0,
"reward": 0.06046927720308304,
"reward_std": 0.11048541963100433,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/brier_reward_group": 0.015035448595881462,
"rewards/format_reward_step": 0.04296875,
"rewards/stepwise_brier_reward": 0.03152916580438614,
"step": 1
},
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.1851851851851852,
"calib/avg_num_step_conf": 0.24609375,
"calib/ece": 0.2141666666666665,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.01666666666666672,
"calib/mean_conf": 0.9641666666666665,
"calib/mu_c": 0.9599999999999999,
"calib/mu_w": 0.9766666666666666,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.0546875,
"calib/pce": 0.2141666666666665,
"calib/std_conf": 0.014409680388158833,
"calib/step_conf_rate": 0.0546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2927.0,
"completions/max_terminated_length": 2927.0,
"completions/mean_length": 749.54296875,
"completions/mean_terminated_length": 820.0128784179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.10169611871242523,
"learning_rate": 5.000000000000001e-07,
"loss": -0.0156,
"num_tokens": 571413.0,
"reward": 0.07521682232618332,
"reward_std": 0.16352277994155884,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/brier_reward_group": 0.01766689494252205,
"rewards/format_reward_step": 0.046875,
"rewards/stepwise_brier_reward": 0.03320039063692093,
"step": 2
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 1.0,
"calib/avg_num_step_conf": 0.15625,
"calib/ece": 0.15399999999999991,
"calib/final_conf_rate": 0.01953125,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.04249999999999987,
"calib/mean_conf": 0.954,
"calib/mu_c": 0.9624999999999999,
"calib/mu_w": 0.92,
"calib/nonempty_final_conf_rate": 0.01953125,
"calib/nonempty_reasoning_rate": 0.03125,
"calib/nonempty_step_conf_rate": 0.0234375,
"calib/pce": 0.15399999999999991,
"calib/std_conf": 0.017435595774162666,
"calib/step_conf_rate": 0.0234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3072.0,
"completions/max_terminated_length": 3072.0,
"completions/mean_length": 757.26171875,
"completions/mean_terminated_length": 801.0701904296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.06682799756526947,
"learning_rate": 7.5e-07,
"loss": -0.01,
"num_tokens": 870528.0,
"reward": 0.03093426302075386,
"reward_std": 0.06631891429424286,
"rewards/accuracy_reward_step": 0.015625,
"rewards/brier_reward_group": 0.007611426059156656,
"rewards/format_reward_step": 0.01953125,
"rewards/stepwise_brier_reward": 0.014563126489520073,
"step": 3
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.275,
"calib/avg_num_step_conf": 0.15234375,
"calib/ece": 0.4111111111111112,
"calib/final_conf_rate": 0.03515625,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.010499999999999954,
"calib/mean_conf": 0.9666666666666668,
"calib/mu_c": 0.962,
"calib/mu_w": 0.9724999999999999,
"calib/nonempty_final_conf_rate": 0.03515625,
"calib/nonempty_reasoning_rate": 0.03515625,
"calib/nonempty_step_conf_rate": 0.03125,
"calib/pce": 0.4111111111111112,
"calib/std_conf": 0.016996731711975965,
"calib/step_conf_rate": 0.03125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12890625,
"completions/max_length": 3052.0,
"completions/max_terminated_length": 3052.0,
"completions/mean_length": 641.2734375,
"completions/mean_terminated_length": 736.17041015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.011850867420434952,
"learning_rate": 1.0000000000000002e-06,
"loss": -0.005,
"num_tokens": 1140862.0,
"reward": 0.03999038413167,
"reward_std": 0.09721466153860092,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/brier_reward_group": 0.007201758213341236,
"rewards/format_reward_step": 0.02734375,
"rewards/stepwise_brier_reward": 0.019947277382016182,
"step": 4
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.7,
"calib/avg_num_step_conf": 0.15625,
"calib/ece": 0.6800000000000002,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.8571428571428571,
"calib/gap": 0.020000000000000018,
"calib/mean_conf": 0.9657142857142859,
"calib/mu_c": 0.98,
"calib/mu_w": 0.96,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.03515625,
"calib/nonempty_step_conf_rate": 0.02734375,
"calib/pce": 0.6800000000000002,
"calib/std_conf": 0.0328881840949181,
"calib/step_conf_rate": 0.02734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 3019.0,
"completions/max_terminated_length": 3019.0,
"completions/mean_length": 707.04296875,
"completions/mean_terminated_length": 793.872802734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.010113976895809174,
"learning_rate": 1.25e-06,
"loss": -0.0062,
"num_tokens": 1428553.0,
"reward": 0.02361677773296833,
"reward_std": 0.06166065111756325,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/brier_reward_group": 0.002863086061552167,
"rewards/format_reward_step": 0.0234375,
"rewards/stepwise_brier_reward": 0.013479027897119522,
"step": 5
},
{
"calib/answer_extract_rate": 0.0859375,
"calib/auroc": 0.53125,
"calib/avg_num_step_conf": 0.41015625,
"calib/ece": 0.5464999999999997,
"calib/final_conf_rate": 0.078125,
"calib/format_rate": 0.0703125,
"calib/frac_conf_gt_0.9": 0.8,
"calib/gap": 0.003750000000000142,
"calib/mean_conf": 0.9464999999999998,
"calib/mu_c": 0.94875,
"calib/mu_w": 0.9449999999999998,
"calib/nonempty_final_conf_rate": 0.078125,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.078125,
"calib/pce": 0.5464999999999997,
"calib/std_conf": 0.036779749863205966,
"calib/step_conf_rate": 0.078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2886.0,
"completions/max_terminated_length": 2886.0,
"completions/mean_length": 652.85546875,
"completions/mean_terminated_length": 711.1957397460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.02027568221092224,
"learning_rate": 1.5e-06,
"loss": -0.013,
"num_tokens": 1701636.0,
"reward": 0.0833994597196579,
"reward_std": 0.138918936252594,
"rewards/accuracy_reward_step": 0.03125,
"rewards/brier_reward_group": 0.025349901989102364,
"rewards/format_reward_step": 0.0703125,
"rewards/stepwise_brier_reward": 0.04262293130159378,
"step": 6
},
{
"calib/answer_extract_rate": 0.046875,
"calib/auroc": 0.2833333333333333,
"calib/avg_num_step_conf": 0.29296875,
"calib/ece": 0.33909090909090905,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.9090909090909091,
"calib/gap": 0.15866666666666662,
"calib/mean_conf": 0.8845454545454546,
"calib/mu_c": 0.9566666666666667,
"calib/mu_w": 0.798,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.0546875,
"calib/nonempty_step_conf_rate": 0.046875,
"calib/pce": 0.33909090909090905,
"calib/std_conf": 0.26475936368928865,
"calib/step_conf_rate": 0.046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3071.0,
"completions/max_terminated_length": 3071.0,
"completions/mean_length": 812.89453125,
"completions/mean_terminated_length": 896.987060546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.013131404295563698,
"learning_rate": 1.75e-06,
"loss": -0.0002,
"num_tokens": 2017161.0,
"reward": 0.05258158966898918,
"reward_std": 0.13502581417560577,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/brier_reward_group": 0.013036327436566353,
"rewards/format_reward_step": 0.0390625,
"rewards/stepwise_brier_reward": 0.02541504241526127,
"step": 7
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/auroc": 0.36904761904761907,
"calib/avg_num_step_conf": 0.21484375,
"calib/ece": 0.49076923076923085,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.8461538461538461,
"calib/gap": -0.004285714285714337,
"calib/mean_conf": 0.9523076923076924,
"calib/mu_c": 0.9500000000000001,
"calib/mu_w": 0.9542857142857144,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.49076923076923085,
"calib/std_conf": 0.0257663603452988,
"calib/step_conf_rate": 0.04296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3029.0,
"completions/max_terminated_length": 3029.0,
"completions/mean_length": 727.203125,
"completions/mean_terminated_length": 792.1871948242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.016019297763705254,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0123,
"num_tokens": 2309837.0,
"reward": 0.050940170884132385,
"reward_std": 0.11515554040670395,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/brier_reward_group": 0.008852246217429638,
"rewards/format_reward_step": 0.0390625,
"rewards/stepwise_brier_reward": 0.02303342893719673,
"step": 8
},
{
"calib/answer_extract_rate": 0.01171875,
"calib/avg_num_step_conf": 0.1171875,
"calib/ece": 0.9525,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.01171875,
"calib/frac_conf_gt_0.9": 0.75,
"calib/mean_conf": 0.9525,
"calib/mu_c": NaN,
"calib/mu_w": 0.9525,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.0234375,
"calib/nonempty_step_conf_rate": 0.0234375,
"calib/pce": 0.9525,
"calib/std_conf": 0.04322904116447644,
"calib/step_conf_rate": 0.0234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 3030.0,
"completions/max_terminated_length": 3030.0,
"completions/mean_length": 706.16015625,
"completions/mean_terminated_length": 775.866943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 0.00911067146807909,
"learning_rate": 2.25e-06,
"loss": -0.0081,
"num_tokens": 2598150.0,
"reward": 0.007569408044219017,
"reward_std": 0.021409519016742706,
"rewards/accuracy_reward_step": 0.0,
"rewards/brier_reward_group": 0.001342187519185245,
"rewards/format_reward_step": 0.01171875,
"rewards/stepwise_brier_reward": 0.0054979450069367886,
"step": 9
},
{
"calib/answer_extract_rate": 0.0390625,
"calib/auroc": 0.7916666666666667,
"calib/avg_num_step_conf": 0.26171875,
"calib/ece": 0.5619999999999999,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": 0.029999999999999916,
"calib/mean_conf": 0.962,
"calib/mu_c": 0.98,
"calib/mu_w": 0.9500000000000001,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.0546875,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.5619999999999999,
"calib/std_conf": 0.029933259094191523,
"calib/step_conf_rate": 0.05078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3068.0,
"completions/max_terminated_length": 3068.0,
"completions/mean_length": 732.6484375,
"completions/mean_terminated_length": 798.119140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.014519107528030872,
"learning_rate": 2.5e-06,
"loss": -0.0183,
"num_tokens": 2892508.0,
"reward": 0.039961472153663635,
"reward_std": 0.08913610875606537,
"rewards/accuracy_reward_step": 0.015625,
"rewards/brier_reward_group": 0.00905166007578373,
"rewards/format_reward_step": 0.03515625,
"rewards/stepwise_brier_reward": 0.017981717362999916,
"step": 10
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.36904761904761907,
"calib/avg_num_step_conf": 0.359375,
"calib/ece": 0.4615384615384616,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.6923076923076923,
"calib/gap": 0.09023809523809523,
"calib/mean_conf": 0.8830769230769231,
"calib/mu_c": 0.9316666666666666,
"calib/mu_w": 0.8414285714285714,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.07421875,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.44153846153846166,
"calib/std_conf": 0.22972842211836586,
"calib/step_conf_rate": 0.06640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3026.0,
"completions/max_terminated_length": 3026.0,
"completions/mean_length": 749.4765625,
"completions/mean_terminated_length": 827.0086059570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.010001948103308678,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0119,
"num_tokens": 3188854.0,
"reward": 0.05315268039703369,
"reward_std": 0.09994122385978699,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/brier_reward_group": 0.014543652534484863,
"rewards/format_reward_step": 0.0390625,
"rewards/stepwise_brier_reward": 0.026192057877779007,
"step": 11
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.42857142857142866,
"calib/avg_num_step_conf": 0.28515625,
"calib/ece": 0.41066666666666657,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7333333333333333,
"calib/gap": 0.0128571428571429,
"calib/mean_conf": 0.9440000000000001,
"calib/mu_c": 0.95,
"calib/mu_w": 0.937142857142857,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.07421875,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.41066666666666657,
"calib/std_conf": 0.04644710252893427,
"calib/step_conf_rate": 0.06640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 2939.0,
"completions/max_terminated_length": 2939.0,
"completions/mean_length": 703.30859375,
"completions/mean_terminated_length": 756.5000610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0128,
"grad_norm": 0.019046172499656677,
"learning_rate": 3e-06,
"loss": -0.0067,
"num_tokens": 3473077.0,
"reward": 0.06333271414041519,
"reward_std": 0.14968423545360565,
"rewards/accuracy_reward_step": 0.03125,
"rewards/brier_reward_group": 0.015546679496765137,
"rewards/format_reward_step": 0.04296875,
"rewards/stepwise_brier_reward": 0.02684667706489563,
"step": 12
},
{
"calib/answer_extract_rate": 0.10546875,
"calib/auroc": 0.33916083916083917,
"calib/avg_num_step_conf": 0.6484375,
"calib/ece": 0.4321666666666666,
"calib/final_conf_rate": 0.09375,
"calib/format_rate": 0.078125,
"calib/frac_conf_gt_0.9": 0.9583333333333334,
"calib/gap": -0.02109090909090916,
"calib/mean_conf": 0.9596666666666666,
"calib/mu_c": 0.95,
"calib/mu_w": 0.9710909090909091,
"calib/nonempty_final_conf_rate": 0.09375,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.09765625,
"calib/pce": 0.42508333333333326,
"calib/std_conf": 0.03698047532895644,
"calib/step_conf_rate": 0.09765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3044.0,
"completions/max_terminated_length": 3044.0,
"completions/mean_length": 769.18359375,
"completions/mean_terminated_length": 827.357177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.023658715188503265,
"learning_rate": 3.2500000000000002e-06,
"loss": -0.0416,
"num_tokens": 3774580.0,
"reward": 0.10999283194541931,
"reward_std": 0.20208735764026642,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/brier_reward_group": 0.029942620545625687,
"rewards/format_reward_step": 0.078125,
"rewards/stepwise_brier_reward": 0.05065370351076126,
"step": 13
},
{
"calib/answer_extract_rate": 0.0859375,
"calib/auroc": 0.42499999999999993,
"calib/avg_num_step_conf": 0.4921875,
"calib/ece": 0.41,
"calib/final_conf_rate": 0.078125,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": 0.08399999999999996,
"calib/mean_conf": 0.9099999999999999,
"calib/mu_c": 0.952,
"calib/mu_w": 0.868,
"calib/nonempty_final_conf_rate": 0.078125,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.41,
"calib/std_conf": 0.20997618912629118,
"calib/step_conf_rate": 0.08984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 2794.0,
"completions/max_terminated_length": 2794.0,
"completions/mean_length": 683.984375,
"completions/mean_terminated_length": 758.0086669921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.014067032374441624,
"learning_rate": 3.5e-06,
"loss": -0.0027,
"num_tokens": 4055080.0,
"reward": 0.0887521356344223,
"reward_std": 0.19326013326644897,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/brier_reward_group": 0.023042481392621994,
"rewards/format_reward_step": 0.06640625,
"rewards/stepwise_brier_reward": 0.042903535068035126,
"step": 14
},
{
"calib/answer_extract_rate": 0.14453125,
"calib/auroc": 0.30000000000000004,
"calib/avg_num_step_conf": 0.796875,
"calib/ece": 0.5422058823529413,
"calib/final_conf_rate": 0.1328125,
"calib/format_rate": 0.11328125,
"calib/frac_conf_gt_0.9": 0.9117647058823529,
"calib/gap": -0.018892857142857045,
"calib/mean_conf": 0.9539705882352942,
"calib/mu_c": 0.942857142857143,
"calib/mu_w": 0.96175,
"calib/nonempty_final_conf_rate": 0.1328125,
"calib/nonempty_reasoning_rate": 0.15625,
"calib/nonempty_step_conf_rate": 0.13671875,
"calib/pce": 0.5422058823529413,
"calib/std_conf": 0.026395727477486648,
"calib/step_conf_rate": 0.13671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 627.1328125,
"completions/mean_terminated_length": 686.0940551757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.016,
"grad_norm": 0.022355226799845695,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0183,
"num_tokens": 4323506.0,
"reward": 0.13517887890338898,
"reward_std": 0.30949950218200684,
"rewards/accuracy_reward_step": 0.0546875,
"rewards/brier_reward_group": 0.024587303400039673,
"rewards/format_reward_step": 0.11328125,
"rewards/stepwise_brier_reward": 0.07081569731235504,
"step": 15
},
{
"calib/answer_extract_rate": 0.1796875,
"calib/auroc": 0.43353174603174605,
"calib/avg_num_step_conf": 1.34375,
"calib/ece": 0.5682826086956521,
"calib/final_conf_rate": 0.1796875,
"calib/format_rate": 0.16015625,
"calib/frac_conf_gt_0.9": 0.9130434782608695,
"calib/gap": -0.006623015873016058,
"calib/mean_conf": 0.959586956521739,
"calib/mu_c": 0.9555555555555555,
"calib/mu_w": 0.9621785714285715,
"calib/nonempty_final_conf_rate": 0.1796875,
"calib/nonempty_reasoning_rate": 0.2109375,
"calib/nonempty_step_conf_rate": 0.1953125,
"calib/pce": 0.5682826086956521,
"calib/std_conf": 0.026132252342199097,
"calib/step_conf_rate": 0.1953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3051.0,
"completions/max_terminated_length": 3051.0,
"completions/mean_length": 814.40234375,
"completions/mean_terminated_length": 883.4194946289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.023184077814221382,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0113,
"num_tokens": 4640841.0,
"reward": 0.18745586276054382,
"reward_std": 0.32550889253616333,
"rewards/accuracy_reward_step": 0.0703125,
"rewards/brier_reward_group": 0.04953538626432419,
"rewards/format_reward_step": 0.16015625,
"rewards/stepwise_brier_reward": 0.09872552752494812,
"step": 16
},
{
"calib/answer_extract_rate": 0.40234375,
"calib/auroc": 0.4261363636363636,
"calib/avg_num_step_conf": 2.6953125,
"calib/ece": 0.39964999999999995,
"calib/final_conf_rate": 0.390625,
"calib/format_rate": 0.33984375,
"calib/frac_conf_gt_0.9": 0.93,
"calib/gap": -0.0004220779220778237,
"calib/mean_conf": 0.95965,
"calib/mu_c": 0.9594642857142857,
"calib/mu_w": 0.9598863636363635,
"calib/nonempty_final_conf_rate": 0.390625,
"calib/nonempty_reasoning_rate": 0.4453125,
"calib/nonempty_step_conf_rate": 0.390625,
"calib/pce": 0.39964999999999995,
"calib/std_conf": 0.039269931245165174,
"calib/step_conf_rate": 0.390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2794.0,
"completions/max_terminated_length": 2794.0,
"completions/mean_length": 724.234375,
"completions/mean_terminated_length": 753.6747436523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.0288087148219347,
"learning_rate": 4.25e-06,
"loss": -0.0327,
"num_tokens": 4929773.0,
"reward": 0.5019322037696838,
"reward_std": 0.60835862159729,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/brier_reward_group": 0.19423416256904602,
"rewards/format_reward_step": 0.33984375,
"rewards/stepwise_brier_reward": 0.24318215250968933,
"step": 17
},
{
"calib/answer_extract_rate": 0.515625,
"calib/auroc": 0.48233430799220267,
"calib/avg_num_step_conf": 4.27734375,
"calib/ece": 0.36853846153846154,
"calib/final_conf_rate": 0.5078125,
"calib/format_rate": 0.4921875,
"calib/frac_conf_gt_0.9": 0.9076923076923077,
"calib/gap": 0.012997076023391507,
"calib/mean_conf": 0.9531538461538461,
"calib/mu_c": 0.9585526315789471,
"calib/mu_w": 0.9455555555555556,
"calib/nonempty_final_conf_rate": 0.5078125,
"calib/nonempty_reasoning_rate": 0.57421875,
"calib/nonempty_step_conf_rate": 0.55859375,
"calib/pce": 0.36853846153846154,
"calib/std_conf": 0.05547049964946183,
"calib/step_conf_rate": 0.55859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05078125,
"completions/max_length": 3071.0,
"completions/max_terminated_length": 3071.0,
"completions/mean_length": 679.6015625,
"completions/mean_terminated_length": 715.9588012695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.0192,
"grad_norm": 0.023622112348675728,
"learning_rate": 4.5e-06,
"loss": 0.0538,
"num_tokens": 5214471.0,
"reward": 0.71192467212677,
"reward_std": 0.6083460450172424,
"rewards/accuracy_reward_step": 0.3046875,
"rewards/brier_reward_group": 0.3079518675804138,
"rewards/format_reward_step": 0.4921875,
"rewards/stepwise_brier_reward": 0.33662158250808716,
"step": 18
},
{
"calib/answer_extract_rate": 0.78125,
"calib/auroc": 0.5199610016420361,
"calib/avg_num_step_conf": 6.04296875,
"calib/ece": 0.5412095,
"calib/final_conf_rate": 0.78125,
"calib/format_rate": 0.734375,
"calib/frac_conf_gt_0.9": 0.93,
"calib/gap": -0.0034010262725783225,
"calib/mean_conf": 0.9522095,
"calib/mu_c": 0.9502369047619046,
"calib/mu_w": 0.9536379310344829,
"calib/nonempty_final_conf_rate": 0.78125,
"calib/nonempty_reasoning_rate": 0.87890625,
"calib/nonempty_step_conf_rate": 0.86328125,
"calib/pce": 0.5367095000000001,
"calib/std_conf": 0.07131394085135108,
"calib/step_conf_rate": 0.86328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2768.0,
"completions/max_terminated_length": 2768.0,
"completions/mean_length": 475.46484375,
"completions/mean_terminated_length": 477.3294372558594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.02240210771560669,
"learning_rate": 4.75e-06,
"loss": -0.0198,
"num_tokens": 5440950.0,
"reward": 0.9314632415771484,
"reward_std": 0.6002488732337952,
"rewards/accuracy_reward_step": 0.3359375,
"rewards/brier_reward_group": 0.41432082653045654,
"rewards/format_reward_step": 0.734375,
"rewards/stepwise_brier_reward": 0.49903208017349243,
"step": 19
},
{
"calib/answer_extract_rate": 0.89453125,
"calib/auroc": 0.4548149905123339,
"calib/avg_num_step_conf": 6.85546875,
"calib/ece": 0.49933539823008855,
"calib/final_conf_rate": 0.8828125,
"calib/format_rate": 0.8359375,
"calib/frac_conf_gt_0.9": 0.8938053097345132,
"calib/gap": 0.003348387096774208,
"calib/mean_conf": 0.9506628318584072,
"calib/mu_c": 0.9525,
"calib/mu_w": 0.9491516129032258,
"calib/nonempty_final_conf_rate": 0.8828125,
"calib/nonempty_reasoning_rate": 0.96875,
"calib/nonempty_step_conf_rate": 0.9375,
"calib/pce": 0.49933539823008855,
"calib/std_conf": 0.05559933897660515,
"calib/step_conf_rate": 0.9375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2937.0,
"completions/max_terminated_length": 2937.0,
"completions/mean_length": 455.4453125,
"completions/mean_terminated_length": 457.2314147949219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 32.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.03255808353424072,
"learning_rate": 5e-06,
"loss": 0.0156,
"num_tokens": 5662416.0,
"reward": 1.0877689123153687,
"reward_std": 0.5388917922973633,
"rewards/accuracy_reward_step": 0.40625,
"rewards/brier_reward_group": 0.4922211170196533,
"rewards/format_reward_step": 0.8359375,
"rewards/stepwise_brier_reward": 0.5619795918464661,
"step": 20
},
{
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5604197067611701,
"calib/avg_num_step_conf": 7.26171875,
"calib/ece": 0.4510438154166667,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.91015625,
"calib/frac_conf_gt_0.9": 0.8791666666666667,
"calib/gap": 0.025455469606003867,
"calib/mean_conf": 0.9385438154166666,
"calib/mu_c": 0.9515897435897436,
"calib/mu_w": 0.9261342739837397,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.4510438154166667,
"calib/std_conf": 0.09832152328578064,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1987.0,
"completions/max_terminated_length": 1987.0,
"completions/mean_length": 442.390625,
"completions/mean_terminated_length": 444.1255187988281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.0224,
"grad_norm": 0.04002515599131584,
"learning_rate": 4.9722222222222224e-06,
"loss": -0.0188,
"num_tokens": 5878628.0,
"reward": 1.235422134399414,
"reward_std": 0.5283094644546509,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/brier_reward_group": 0.6382216215133667,
"rewards/format_reward_step": 0.91015625,
"rewards/stepwise_brier_reward": 0.6394045352935791,
"step": 21
},
{
"calib/answer_extract_rate": 0.94921875,
"calib/auroc": 0.5054565547727586,
"calib/avg_num_step_conf": 7.34375,
"calib/ece": 0.41908713692946054,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.91015625,
"calib/frac_conf_gt_0.9": 0.8838174273858921,
"calib/gap": 0.005586752313855636,
"calib/mean_conf": 0.946058091286307,
"calib/mu_c": 0.9487007874015749,
"calib/mu_w": 0.9431140350877193,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.41908713692946054,
"calib/std_conf": 0.050211386630864475,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2441.0,
"completions/max_terminated_length": 2441.0,
"completions/mean_length": 401.01171875,
"completions/mean_terminated_length": 405.7668151855469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 31.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.014912611804902554,
"learning_rate": 4.944444444444445e-06,
"loss": -0.0018,
"num_tokens": 6083103.0,
"reward": 1.2581007480621338,
"reward_std": 0.44688111543655396,
"rewards/accuracy_reward_step": 0.5,
"rewards/brier_reward_group": 0.6115713119506836,
"rewards/format_reward_step": 0.91015625,
"rewards/stepwise_brier_reward": 0.6005195379257202,
"step": 22
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5582003746528001,
"calib/avg_num_step_conf": 8.078125,
"calib/ece": 0.3934,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.844,
"calib/gap": 0.010636909760351254,
"calib/mean_conf": 0.93636,
"calib/mu_c": 0.9411678832116788,
"calib/mu_w": 0.9305309734513275,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.39088,
"calib/std_conf": 0.07596808803701723,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1372.0,
"completions/max_terminated_length": 1372.0,
"completions/mean_length": 438.84765625,
"completions/mean_terminated_length": 445.8135070800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.01338372752070427,
"learning_rate": 4.9166666666666665e-06,
"loss": -0.0124,
"num_tokens": 6299384.0,
"reward": 1.3480709791183472,
"reward_std": 0.4616602957248688,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/brier_reward_group": 0.7146314382553101,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.6307774782180786,
"step": 23
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5213293650793651,
"calib/avg_num_step_conf": 8.30078125,
"calib/ece": 0.520722891566265,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.8594377510040161,
"calib/gap": 0.009337301587301505,
"calib/mean_conf": 0.9424096385542169,
"calib/mu_c": 0.9478095238095239,
"calib/mu_w": 0.9384722222222224,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.520722891566265,
"calib/std_conf": 0.06085581804420343,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2503.0,
"completions/max_terminated_length": 2503.0,
"completions/mean_length": 461.5625,
"completions/mean_terminated_length": 468.888916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.0256,
"grad_norm": 0.016162144020199776,
"learning_rate": 4.888888888888889e-06,
"loss": -0.0411,
"num_tokens": 6522056.0,
"reward": 1.1937477588653564,
"reward_std": 0.47592926025390625,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/brier_reward_group": 0.6197054386138916,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.5927855372428894,
"step": 24
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5154220779220778,
"calib/avg_num_step_conf": 8.546875,
"calib/ece": 0.38333333333333325,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.8470588235294118,
"calib/gap": -0.00196553446553438,
"calib/mean_conf": 0.9392549019607843,
"calib/mu_c": 0.9383916083916084,
"calib/mu_w": 0.9403571428571428,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3809019607843136,
"calib/std_conf": 0.03940530627016226,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2528.0,
"completions/max_terminated_length": 2528.0,
"completions/mean_length": 468.71484375,
"completions/mean_terminated_length": 474.2727355957031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.013917172327637672,
"learning_rate": 4.861111111111111e-06,
"loss": -0.0145,
"num_tokens": 6745271.0,
"reward": 1.4023184776306152,
"reward_std": 0.31770461797714233,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/brier_reward_group": 0.720646858215332,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.662064790725708,
"step": 25
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5097145122918318,
"calib/avg_num_step_conf": 8.06640625,
"calib/ece": 0.32644268774703555,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.8656126482213439,
"calib/gap": 0.00112212529738287,
"calib/mean_conf": 0.9368774703557312,
"calib/mu_c": 0.9373076923076921,
"calib/mu_w": 0.9361855670103092,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3233596837944664,
"calib/std_conf": 0.05741579851654595,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2298.0,
"completions/max_terminated_length": 2298.0,
"completions/mean_length": 448.64453125,
"completions/mean_terminated_length": 453.9644470214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.009529990144073963,
"learning_rate": 4.833333333333333e-06,
"loss": -0.0049,
"num_tokens": 6965364.0,
"reward": 1.4513397216796875,
"reward_std": 0.3023107051849365,
"rewards/accuracy_reward_step": 0.609375,
"rewards/brier_reward_group": 0.7412692308425903,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6578395962715149,
"step": 26
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4055364204617936,
"calib/avg_num_step_conf": 8.36328125,
"calib/ece": 0.3948605577689242,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.7649402390438247,
"calib/gap": -0.009161245056767298,
"calib/mean_conf": 0.9267330677290837,
"calib/mu_c": 0.9224626865671642,
"calib/mu_w": 0.9316239316239315,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3938645418326692,
"calib/std_conf": 0.0453196313555899,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2524.0,
"completions/max_terminated_length": 2524.0,
"completions/mean_length": 493.4140625,
"completions/mean_terminated_length": 497.2992248535156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.0288,
"grad_norm": 0.009581191465258598,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0041,
"num_tokens": 7196894.0,
"reward": 1.3459198474884033,
"reward_std": 0.36943548917770386,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/brier_reward_group": 0.7029818296432495,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6260105967521667,
"step": 27
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.39229926007125243,
"calib/avg_num_step_conf": 7.91015625,
"calib/ece": 0.27197628458498024,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6719367588932806,
"calib/gap": -0.016911482597971927,
"calib/mean_conf": 0.9152173913043479,
"calib/mu_c": 0.9092682926829269,
"calib/mu_w": 0.9261797752808988,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26948616600790515,
"calib/std_conf": 0.05624315657899554,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2551.0,
"completions/max_terminated_length": 2551.0,
"completions/mean_length": 513.23046875,
"completions/mean_terminated_length": 519.3162231445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.012991939671337605,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0344,
"num_tokens": 7435225.0,
"reward": 1.514519214630127,
"reward_std": 0.35605090856552124,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.8041585087776184,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.722668468952179,
"step": 28
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4374403967194354,
"calib/avg_num_step_conf": 8.0,
"calib/ece": 0.3319291338582678,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6141732283464567,
"calib/gap": -0.008855617013160288,
"calib/mean_conf": 0.9106692913385827,
"calib/mu_c": 0.9069387755102042,
"calib/mu_w": 0.9157943925233645,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3319291338582678,
"calib/std_conf": 0.0448350529202759,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2433.0,
"completions/max_terminated_length": 2433.0,
"completions/mean_length": 561.73046875,
"completions/mean_terminated_length": 566.153564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.010171281173825264,
"learning_rate": 4.75e-06,
"loss": 0.0169,
"num_tokens": 7686156.0,
"reward": 1.4394994974136353,
"reward_std": 0.36331403255462646,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/brier_reward_group": 0.7675879001617432,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7091603875160217,
"step": 29
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5130804228556136,
"calib/avg_num_step_conf": 7.453125,
"calib/ece": 0.2665612648221343,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.41106719367588934,
"calib/gap": -0.0003050983540746621,
"calib/mean_conf": 0.8891699604743083,
"calib/mu_c": 0.889056603773585,
"calib/mu_w": 0.8893617021276596,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26363636363636356,
"calib/std_conf": 0.05355082147853668,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2295.0,
"completions/max_terminated_length": 2295.0,
"completions/mean_length": 539.1796875,
"completions/mean_terminated_length": 543.4251708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.032,
"grad_norm": 0.018558280542492867,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0198,
"num_tokens": 7931170.0,
"reward": 1.4883720874786377,
"reward_std": 0.3338737487792969,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/brier_reward_group": 0.7994797229766846,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7008838653564453,
"step": 30
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4583174603174603,
"calib/avg_num_step_conf": 7.23828125,
"calib/ece": 0.3742629482071713,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.27091633466135456,
"calib/gap": -0.002998095238095466,
"calib/mean_conf": 0.8762549800796813,
"calib/mu_c": 0.8747619047619046,
"calib/mu_w": 0.8777600000000001,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3742629482071713,
"calib/std_conf": 0.05041973943408669,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2521.0,
"completions/max_terminated_length": 2521.0,
"completions/mean_length": 577.0390625,
"completions/mean_terminated_length": 579.302001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.008476846851408482,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0122,
"num_tokens": 8184804.0,
"reward": 1.3323320150375366,
"reward_std": 0.32351502776145935,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/brier_reward_group": 0.7001621723175049,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.699478805065155,
"step": 31
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5121424030514939,
"calib/avg_num_step_conf": 6.30859375,
"calib/ece": 0.27343873517786554,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.09486166007905138,
"calib/gap": 0.002286713286713371,
"calib/mean_conf": 0.8386561264822134,
"calib/mu_c": 0.8396503496503496,
"calib/mu_w": 0.8373636363636362,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.27343873517786554,
"calib/std_conf": 0.06560581420669309,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2242.0,
"completions/max_terminated_length": 2242.0,
"completions/mean_length": 507.671875,
"completions/mean_terminated_length": 511.6692810058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.01096720714122057,
"learning_rate": 4.666666666666667e-06,
"loss": -0.0019,
"num_tokens": 8421472.0,
"reward": 1.4251006841659546,
"reward_std": 0.2962689697742462,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/brier_reward_group": 0.7659081816673279,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7391819953918457,
"step": 32
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5132413882413882,
"calib/avg_num_step_conf": 6.27734375,
"calib/ece": 0.2715537848605577,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.07171314741035857,
"calib/gap": 0.0026249676249674447,
"calib/mean_conf": 0.8301992031872509,
"calib/mu_c": 0.8313286713286712,
"calib/mu_w": 0.8287037037037037,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26601593625498,
"calib/std_conf": 0.0617568177709778,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2515.0,
"completions/max_terminated_length": 2515.0,
"completions/mean_length": 534.4921875,
"completions/mean_terminated_length": 540.830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.0352,
"grad_norm": 0.009881477802991867,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0407,
"num_tokens": 8665174.0,
"reward": 1.428787112236023,
"reward_std": 0.2549947500228882,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/brier_reward_group": 0.7520343661308289,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7678015232086182,
"step": 33
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4415031592949784,
"calib/avg_num_step_conf": 5.93359375,
"calib/ece": 0.1896428571428571,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011904761904761904,
"calib/gap": -0.01558563352178255,
"calib/mean_conf": 0.7915476190476189,
"calib/mu_c": 0.7855483870967741,
"calib/mu_w": 0.8011340206185567,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18305555555555553,
"calib/std_conf": 0.06934776717235662,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3059.0,
"completions/max_terminated_length": 3059.0,
"completions/mean_length": 496.3671875,
"completions/mean_terminated_length": 500.27557373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.013796166516840458,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0134,
"num_tokens": 8897356.0,
"reward": 1.513746976852417,
"reward_std": 0.4082501530647278,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/brier_reward_group": 0.8546947240829468,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8096679449081421,
"step": 34
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5840664711632454,
"calib/avg_num_step_conf": 5.4765625,
"calib/ece": 0.14336032388663963,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.024291497975708502,
"calib/gap": 0.024426057813154656,
"calib/mean_conf": 0.7668421052631579,
"calib/mu_c": 0.776038961038961,
"calib/mu_w": 0.7516129032258063,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.14336032388663963,
"calib/std_conf": 0.0838916025605856,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2466.0,
"completions/max_terminated_length": 2466.0,
"completions/mean_length": 592.1328125,
"completions/mean_terminated_length": 599.1541748046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 238.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.007923703640699387,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0172,
"num_tokens": 9158198.0,
"reward": 1.4799138307571411,
"reward_std": 0.29859060049057007,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/brier_reward_group": 0.8021693229675293,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.7815483808517456,
"step": 35
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5521396396396396,
"calib/avg_num_step_conf": 5.44921875,
"calib/ece": 0.05578740157480315,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.027559055118110236,
"calib/gap": 0.01752852852852871,
"calib/mean_conf": 0.7644488188976378,
"calib/mu_c": 0.7695555555555558,
"calib/mu_w": 0.7520270270270271,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05578740157480315,
"calib/std_conf": 0.08590126255215237,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2492.0,
"completions/max_terminated_length": 2492.0,
"completions/mean_length": 502.5078125,
"completions/mean_terminated_length": 506.4645690917969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.0384,
"grad_norm": 0.01005138736218214,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0007,
"num_tokens": 9389552.0,
"reward": 1.626293659210205,
"reward_std": 0.27581554651260376,
"rewards/accuracy_reward_step": 0.703125,
"rewards/brier_reward_group": 0.8694294691085815,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8466823697090149,
"step": 36
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5134461498097862,
"calib/avg_num_step_conf": 5.0390625,
"calib/ece": 0.2932388663967612,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.06477732793522267,
"calib/gap": 0.0018890200708382432,
"calib/mean_conf": 0.8033603238866396,
"calib/mu_c": 0.8042857142857143,
"calib/mu_w": 0.802396694214876,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.2932388663967612,
"calib/std_conf": 0.07476578628139789,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2983.0,
"completions/max_terminated_length": 2983.0,
"completions/mean_length": 600.9609375,
"completions/mean_terminated_length": 605.6929321289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.008782311342656612,
"learning_rate": 4.527777777777778e-06,
"loss": 0.0043,
"num_tokens": 9650494.0,
"reward": 1.3417272567749023,
"reward_std": 0.33047229051589966,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/brier_reward_group": 0.7289199233055115,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.7551764845848083,
"step": 37
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5028111603064165,
"calib/avg_num_step_conf": 4.9765625,
"calib/ece": 0.20300813008130075,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.07317073170731707,
"calib/gap": 0.009356947079907085,
"calib/mean_conf": 0.8249593495934959,
"calib/mu_c": 0.8284967320261437,
"calib/mu_w": 0.8191397849462366,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.20300813008130075,
"calib/std_conf": 0.07112766472342436,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2529.0,
"completions/max_terminated_length": 2529.0,
"completions/mean_length": 588.40234375,
"completions/mean_terminated_length": 595.3794555664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.01418561115860939,
"learning_rate": 4.5e-06,
"loss": 0.0212,
"num_tokens": 9908013.0,
"reward": 1.4612244367599487,
"reward_std": 0.3273961544036865,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/brier_reward_group": 0.7723513841629028,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.7678588032722473,
"step": 38
},
{
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5400226229290039,
"calib/avg_num_step_conf": 5.515625,
"calib/ece": 0.32918699186991873,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.2601626016260163,
"calib/gap": 0.011975514006254517,
"calib/mean_conf": 0.8698373983739838,
"calib/mu_c": 0.8753383458646616,
"calib/mu_w": 0.8633628318584071,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.32918699186991873,
"calib/std_conf": 0.06699695102681971,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2980.0,
"completions/max_terminated_length": 2980.0,
"completions/mean_length": 648.86328125,
"completions/mean_terminated_length": 648.86328125,
"completions/min_length": 232.0,
"completions/min_terminated_length": 232.0,
"epoch": 0.0416,
"grad_norm": 0.014082246460020542,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0524,
"num_tokens": 10180210.0,
"reward": 1.362987756729126,
"reward_std": 0.37360742688179016,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/brier_reward_group": 0.7202534675598145,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.7473224401473999,
"step": 39
},
{
"calib/answer_extract_rate": 0.94140625,
"calib/auroc": 0.4992827868852459,
"calib/avg_num_step_conf": 6.06640625,
"calib/ece": 0.40673553719008254,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.5206611570247934,
"calib/gap": 0.002109289617486243,
"calib/mean_conf": 0.902603305785124,
"calib/mu_c": 0.9036666666666666,
"calib/mu_w": 0.9015573770491804,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.40673553719008254,
"calib/std_conf": 0.04406324893181568,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2366.0,
"completions/max_terminated_length": 2366.0,
"completions/mean_length": 693.1015625,
"completions/mean_terminated_length": 704.1032104492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.012825295329093933,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0374,
"num_tokens": 10464404.0,
"reward": 1.2686021327972412,
"reward_std": 0.407518208026886,
"rewards/accuracy_reward_step": 0.46875,
"rewards/brier_reward_group": 0.645278811454773,
"rewards/format_reward_step": 0.94140625,
"rewards/stepwise_brier_reward": 0.6713171005249023,
"step": 40
},
{
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.61613358894158,
"calib/avg_num_step_conf": 6.484375,
"calib/ece": 0.14174999999999993,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.7333333333333333,
"calib/gap": 0.03217233377055784,
"calib/mean_conf": 0.9209166666666666,
"calib/mu_c": 0.9280213903743315,
"calib/mu_w": 0.8958490566037737,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.14174999999999993,
"calib/std_conf": 0.06836782666007617,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2666.0,
"completions/max_terminated_length": 2666.0,
"completions/mean_length": 687.8203125,
"completions/mean_terminated_length": 690.5177001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.009431262500584126,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0901,
"num_tokens": 10747734.0,
"reward": 1.6060315370559692,
"reward_std": 0.40255552530288696,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/brier_reward_group": 0.8279871344566345,
"rewards/format_reward_step": 0.9375,
"rewards/stepwise_brier_reward": 0.7992638349533081,
"step": 41
},
{
"calib/answer_extract_rate": 0.88671875,
"calib/auroc": 0.5217840608465608,
"calib/avg_num_step_conf": 9.875,
"calib/ece": 0.3039473684210526,
"calib/final_conf_rate": 0.890625,
"calib/format_rate": 0.8828125,
"calib/frac_conf_gt_0.9": 0.8552631578947368,
"calib/gap": 0.0047916666666665275,
"calib/mean_conf": 0.9355263157894738,
"calib/mu_c": 0.9372916666666666,
"calib/mu_w": 0.9325000000000001,
"calib/nonempty_final_conf_rate": 0.890625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3039473684210526,
"calib/std_conf": 0.03183267948065038,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2735.0,
"completions/max_terminated_length": 2735.0,
"completions/mean_length": 763.11328125,
"completions/mean_terminated_length": 772.162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 299.0,
"epoch": 0.0448,
"grad_norm": 0.011268241330981255,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0707,
"num_tokens": 11047459.0,
"reward": 1.3278944492340088,
"reward_std": 0.4228435754776001,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.6321179866790771,
"rewards/format_reward_step": 0.8828125,
"rewards/stepwise_brier_reward": 0.663834810256958,
"step": 42
},
{
"calib/answer_extract_rate": 0.90625,
"calib/auroc": 0.5982491925888153,
"calib/avg_num_step_conf": 8.68359375,
"calib/ece": 0.25412017167381973,
"calib/final_conf_rate": 0.91015625,
"calib/format_rate": 0.90625,
"calib/frac_conf_gt_0.9": 0.8369098712446352,
"calib/gap": 0.020009348971612995,
"calib/mean_conf": 0.933519313304721,
"calib/mu_c": 0.9398742138364782,
"calib/mu_w": 0.9198648648648652,
"calib/nonempty_final_conf_rate": 0.91015625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2526180257510729,
"calib/std_conf": 0.059674156816859406,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2560.0,
"completions/max_terminated_length": 2560.0,
"completions/mean_length": 727.28515625,
"completions/mean_terminated_length": 733.0117797851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 251.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.009542334824800491,
"learning_rate": 4.361111111111112e-06,
"loss": 0.0603,
"num_tokens": 11338868.0,
"reward": 1.4376327991485596,
"reward_std": 0.5053052306175232,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/brier_reward_group": 0.7418070435523987,
"rewards/format_reward_step": 0.90625,
"rewards/stepwise_brier_reward": 0.71184903383255,
"step": 43
},
{
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.4944179714091219,
"calib/avg_num_step_conf": 5.859375,
"calib/ece": 0.40909465020576125,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.9300411522633745,
"calib/gap": -0.0006555479918309848,
"calib/mean_conf": 0.944074074074074,
"calib/mu_c": 0.9437692307692308,
"calib/mu_w": 0.9444247787610618,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.40909465020576125,
"calib/std_conf": 0.02147509483603587,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 3011.0,
"completions/max_terminated_length": 3011.0,
"completions/mean_length": 637.25,
"completions/mean_terminated_length": 649.9442138671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 284.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.01006728783249855,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0016,
"num_tokens": 11608324.0,
"reward": 1.2974810600280762,
"reward_std": 0.34184491634368896,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/brier_reward_group": 0.6303683519363403,
"rewards/format_reward_step": 0.9375,
"rewards/stepwise_brier_reward": 0.6533058881759644,
"step": 44
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5755058791359038,
"calib/avg_num_step_conf": 5.21484375,
"calib/ece": 0.30577689243027883,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.8685258964143426,
"calib/gap": 0.014649986327590736,
"calib/mean_conf": 0.9364541832669323,
"calib/mu_c": 0.9418238993710691,
"calib/mu_w": 0.9271739130434784,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30438247011952185,
"calib/std_conf": 0.04392787565308283,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2670.0,
"completions/max_terminated_length": 2670.0,
"completions/mean_length": 561.43359375,
"completions/mean_terminated_length": 565.8543090820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.048,
"grad_norm": 0.009378692135214806,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0162,
"num_tokens": 11857099.0,
"reward": 1.4878321886062622,
"reward_std": 0.35901641845703125,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/brier_reward_group": 0.7700293064117432,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.75161212682724,
"step": 45
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.52506435006435,
"calib/avg_num_step_conf": 4.6328125,
"calib/ece": 0.3822310756972112,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9083665338645418,
"calib/gap": 0.0014536679536677477,
"calib/mean_conf": 0.94,
"calib/mu_c": 0.9406428571428571,
"calib/mu_w": 0.9391891891891894,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3822310756972112,
"calib/std_conf": 0.022190770128241635,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2241.0,
"completions/max_terminated_length": 2241.0,
"completions/mean_length": 561.3125,
"completions/mean_terminated_length": 565.7322998046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.010353241115808487,
"learning_rate": 4.277777777777778e-06,
"loss": -0.0025,
"num_tokens": 12105563.0,
"reward": 1.3909752368927002,
"reward_std": 0.3092753291130066,
"rewards/accuracy_reward_step": 0.546875,
"rewards/brier_reward_group": 0.6861876845359802,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7292758822441101,
"step": 46
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6003111471861471,
"calib/avg_num_step_conf": 4.73046875,
"calib/ece": 0.3187999999999999,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.916,
"calib/gap": 0.01726190476190481,
"calib/mean_conf": 0.9348,
"calib/mu_c": 0.9414285714285715,
"calib/mu_w": 0.9241666666666667,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3187999999999999,
"calib/std_conf": 0.0615805163992638,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2187.0,
"completions/max_terminated_length": 2187.0,
"completions/mean_length": 546.3046875,
"completions/mean_terminated_length": 552.7826538085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.011031719855964184,
"learning_rate": 4.25e-06,
"loss": 0.0258,
"num_tokens": 12351393.0,
"reward": 1.4612222909927368,
"reward_std": 0.2621772885322571,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/brier_reward_group": 0.7333366870880127,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7521774768829346,
"step": 47
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5540465631929046,
"calib/avg_num_step_conf": 4.65625,
"calib/ece": 0.41752941176470565,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.8823529411764706,
"calib/gap": 0.004502956393200308,
"calib/mean_conf": 0.9351764705882352,
"calib/mu_c": 0.9373484848484848,
"calib/mu_w": 0.9328455284552845,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.41752941176470565,
"calib/std_conf": 0.02623925055896591,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2182.0,
"completions/max_terminated_length": 2182.0,
"completions/mean_length": 487.90625,
"completions/mean_terminated_length": 493.69171142578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.0,
"epoch": 0.0512,
"grad_norm": 0.029779616743326187,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0031,
"num_tokens": 12579985.0,
"reward": 1.3629781007766724,
"reward_std": 0.3381595015525818,
"rewards/accuracy_reward_step": 0.515625,
"rewards/brier_reward_group": 0.69517982006073,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7098574638366699,
"step": 48
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.586783891094725,
"calib/avg_num_step_conf": 4.38671875,
"calib/ece": 0.25755905511811017,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9015748031496063,
"calib/gap": 0.01291973908111177,
"calib/mean_conf": 0.9347244094488188,
"calib/mu_c": 0.9388953488372094,
"calib/mu_w": 0.9259756097560976,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.25755905511811017,
"calib/std_conf": 0.03446615986489285,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2302.0,
"completions/max_terminated_length": 2302.0,
"completions/mean_length": 480.9375,
"completions/mean_terminated_length": 486.6403503417969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 230.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.012473726645112038,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0526,
"num_tokens": 12807641.0,
"reward": 1.5611481666564941,
"reward_std": 0.28796297311782837,
"rewards/accuracy_reward_step": 0.671875,
"rewards/brier_reward_group": 0.8108144402503967,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7619033455848694,
"step": 49
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.593910034602076,
"calib/avg_num_step_conf": 4.1640625,
"calib/ece": 0.26933333333333315,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9058823529411765,
"calib/gap": 0.009529411764705564,
"calib/mean_conf": 0.9359999999999999,
"calib/mu_c": 0.9391764705882353,
"calib/mu_w": 0.9296470588235297,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26933333333333315,
"calib/std_conf": 0.02181203121862684,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2540.0,
"completions/max_terminated_length": 2540.0,
"completions/mean_length": 481.08984375,
"completions/mean_terminated_length": 486.79449462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.013286111876368523,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0492,
"num_tokens": 13036160.0,
"reward": 1.549306869506836,
"reward_std": 0.2995968759059906,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/brier_reward_group": 0.798114538192749,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7506750822067261,
"step": 50
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6640414360720874,
"calib/avg_num_step_conf": 3.66015625,
"calib/ece": 0.2511764705882351,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.8588235294117647,
"calib/gap": 0.012956577266922231,
"calib/mean_conf": 0.9318039215686273,
"calib/mu_c": 0.9359195402298852,
"calib/mu_w": 0.922962962962963,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2503137254901959,
"calib/std_conf": 0.024636191779688926,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3053.0,
"completions/max_terminated_length": 3053.0,
"completions/mean_length": 455.47265625,
"completions/mean_terminated_length": 460.87353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.0544,
"grad_norm": 0.0234882440418005,
"learning_rate": 4.138888888888889e-06,
"loss": -0.0162,
"num_tokens": 13262057.0,
"reward": 1.5801244974136353,
"reward_std": 0.3207731246948242,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/brier_reward_group": 0.8323727250099182,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7771878242492676,
"step": 51
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5761648745519713,
"calib/avg_num_step_conf": 3.64453125,
"calib/ece": 0.20206349206349206,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9444444444444444,
"calib/gap": 0.0036852394916911946,
"calib/mean_conf": 0.938174603174603,
"calib/mu_c": 0.9391397849462366,
"calib/mu_w": 0.9354545454545454,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.20107142857142857,
"calib/std_conf": 0.024845580030756997,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2603.0,
"completions/max_terminated_length": 2603.0,
"completions/mean_length": 425.95703125,
"completions/mean_terminated_length": 431.0079345703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.020434802398085594,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0314,
"num_tokens": 13479054.0,
"reward": 1.6307379007339478,
"reward_std": 0.25411444902420044,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/brier_reward_group": 0.8398484587669373,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8081033229827881,
"step": 52
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6889456506314745,
"calib/avg_num_step_conf": 2.90234375,
"calib/ece": 0.26243137254901955,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9098039215686274,
"calib/gap": 0.017324393358876145,
"calib/mean_conf": 0.9447843137254901,
"calib/mu_c": 0.950287356321839,
"calib/mu_w": 0.9329629629629629,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26243137254901955,
"calib/std_conf": 0.02623895750969716,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2190.0,
"completions/max_terminated_length": 2190.0,
"completions/mean_length": 413.1640625,
"completions/mean_terminated_length": 418.0632629394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.022565390914678574,
"learning_rate": 4.083333333333334e-06,
"loss": -0.0037,
"num_tokens": 13690648.0,
"reward": 1.5788846015930176,
"reward_std": 0.28395211696624756,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/brier_reward_group": 0.8142070770263672,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7903937101364136,
"step": 53
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6343967730106344,
"calib/avg_num_step_conf": 2.796875,
"calib/ece": 0.16806640624999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.921875,
"calib/gap": 0.010365786578657832,
"calib/mean_conf": 0.95095703125,
"calib/mu_c": 0.9531435643564357,
"calib/mu_w": 0.9427777777777778,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.16498046874999997,
"calib/std_conf": 0.03614697252103588,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1085.0,
"completions/max_terminated_length": 1085.0,
"completions/mean_length": 344.88671875,
"completions/mean_terminated_length": 350.36114501953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.0576,
"grad_norm": 0.01560815516859293,
"learning_rate": 4.055555555555556e-06,
"loss": -0.0029,
"num_tokens": 13885171.0,
"reward": 1.7181185483932495,
"reward_std": 0.2507132887840271,
"rewards/accuracy_reward_step": 0.7890625,
"rewards/brier_reward_group": 0.8910635709762573,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.825160801410675,
"step": 54
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6021959459459459,
"calib/avg_num_step_conf": 2.7265625,
"calib/ece": 0.39780000000000004,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9803921568627451,
"calib/gap": 0.02071002252252252,
"calib/mean_conf": 0.9625058823529412,
"calib/mu_c": 0.9715208333333334,
"calib/mu_w": 0.9508108108108109,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.39780000000000004,
"calib/std_conf": 0.06857426891739195,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2252.0,
"completions/max_terminated_length": 2252.0,
"completions/mean_length": 367.59765625,
"completions/mean_terminated_length": 371.95654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.012119713239371777,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0246,
"num_tokens": 14087100.0,
"reward": 1.4041357040405273,
"reward_std": 0.3639181852340698,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.7138887643814087,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6604669690132141,
"step": 55
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5773824731732242,
"calib/avg_num_step_conf": 2.328125,
"calib/ece": 0.37866666666666676,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9725490196078431,
"calib/gap": 0.008937787429739608,
"calib/mean_conf": 0.9747450980392158,
"calib/mu_c": 0.978355263157895,
"calib/mu_w": 0.9694174757281554,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37866666666666676,
"calib/std_conf": 0.041537057934489446,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 810.0,
"completions/max_terminated_length": 810.0,
"completions/mean_length": 381.9140625,
"completions/mean_terminated_length": 387.9762268066406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.01059749536216259,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0108,
"num_tokens": 14291710.0,
"reward": 1.446723222732544,
"reward_std": 0.29063791036605835,
"rewards/accuracy_reward_step": 0.59375,
"rewards/brier_reward_group": 0.7016832828521729,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7258347272872925,
"step": 56
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5818525654749162,
"calib/avg_num_step_conf": 2.1015625,
"calib/ece": 0.30692913385826764,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9921259842519685,
"calib/gap": 0.004722757439520442,
"calib/mean_conf": 0.9880314960629922,
"calib/mu_c": 0.9895375722543353,
"calib/mu_w": 0.9848148148148148,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.30692913385826764,
"calib/std_conf": 0.0161459568122748,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2178.0,
"completions/max_terminated_length": 2178.0,
"completions/mean_length": 359.89453125,
"completions/mean_terminated_length": 364.1620788574219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.0608,
"grad_norm": 0.015423390083014965,
"learning_rate": 3.972222222222223e-06,
"loss": -0.0222,
"num_tokens": 14490635.0,
"reward": 1.5489262342453003,
"reward_std": 0.3717479705810547,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/brier_reward_group": 0.7912935018539429,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7403486967086792,
"step": 57
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49721875,
"calib/avg_num_step_conf": 1.921875,
"calib/ece": 0.4861260869565219,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9960474308300395,
"calib/gap": 0.0032710937500001203,
"calib/mean_conf": 0.9920549407114626,
"calib/mu_c": 0.9936710937500001,
"calib/mu_w": 0.9904,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4861260869565219,
"calib/std_conf": 0.03717970707095284,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2182.0,
"completions/max_terminated_length": 2182.0,
"completions/mean_length": 371.55859375,
"completions/mean_terminated_length": 375.9644470214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.014439908787608147,
"learning_rate": 3.944444444444445e-06,
"loss": -0.0435,
"num_tokens": 14692074.0,
"reward": 1.2871463298797607,
"reward_std": 0.3386479616165161,
"rewards/accuracy_reward_step": 0.5,
"rewards/brier_reward_group": 0.6030274629592896,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.5768078565597534,
"step": 58
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5329601990049752,
"calib/avg_num_step_conf": 1.83984375,
"calib/ece": 0.4704244094488189,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.000775833333333309,
"calib/mean_conf": 0.9979834645669291,
"calib/mu_c": 0.99835,
"calib/mu_w": 0.9975741666666667,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.4704244094488189,
"calib/std_conf": 0.004194649080693384,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2227.0,
"completions/max_terminated_length": 2227.0,
"completions/mean_length": 351.65625,
"completions/mean_terminated_length": 354.4252014160156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.011451630853116512,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0039,
"num_tokens": 14888346.0,
"reward": 1.3189291954040527,
"reward_std": 0.3809748589992523,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/brier_reward_group": 0.6371943950653076,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.5838346481323242,
"step": 59
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.526216870540265,
"calib/avg_num_step_conf": 1.58203125,
"calib/ece": 0.4265624505928854,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9841897233201581,
"calib/gap": 0.0077425395005098485,
"calib/mean_conf": 0.9829260869565217,
"calib/mu_c": 0.9862618055555556,
"calib/mu_w": 0.9785192660550458,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.4201592885375494,
"calib/std_conf": 0.11431142691534942,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1467.0,
"completions/max_terminated_length": 1467.0,
"completions/mean_length": 328.60546875,
"completions/mean_terminated_length": 332.5019836425781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.064,
"grad_norm": 0.013444377109408379,
"learning_rate": 3.88888888888889e-06,
"loss": -0.0305,
"num_tokens": 15081325.0,
"reward": 1.3731634616851807,
"reward_std": 0.30295276641845703,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.6537109613418579,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6358177065849304,
"step": 60
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4524908424908425,
"calib/avg_num_step_conf": 1.61328125,
"calib/ece": 0.30682055335968383,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0009995384615386627,
"calib/mean_conf": 0.9985201581027668,
"calib/mu_c": 0.9982119999999999,
"calib/mu_w": 0.9992115384615385,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.30682055335968383,
"calib/std_conf": 0.003528028479936166,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2243.0,
"completions/max_terminated_length": 2243.0,
"completions/mean_length": 277.09765625,
"completions/mean_terminated_length": 280.3834228515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.014007938094437122,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0207,
"num_tokens": 15256326.0,
"reward": 1.5428268909454346,
"reward_std": 0.276619553565979,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.7586652040481567,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7095175981521606,
"step": 61
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5042087542087542,
"calib/avg_num_step_conf": 1.22265625,
"calib/ece": 0.38059921568627464,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9882352941176471,
"calib/gap": 0.010356332556332615,
"calib/mean_conf": 0.9923639215686274,
"calib/mu_c": 0.9963846153846154,
"calib/mu_w": 0.9860282828282828,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.38059921568627464,
"calib/std_conf": 0.0388025341843945,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1334.0,
"completions/max_terminated_length": 1334.0,
"completions/mean_length": 288.89453125,
"completions/mean_terminated_length": 293.48016357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.012418070808053017,
"learning_rate": 3.833333333333334e-06,
"loss": -0.0151,
"num_tokens": 15437363.0,
"reward": 1.4541146755218506,
"reward_std": 0.4250434637069702,
"rewards/accuracy_reward_step": 0.609375,
"rewards/brier_reward_group": 0.7418953776359558,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6683136224746704,
"step": 62
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4864263462394304,
"calib/avg_num_step_conf": 1.31640625,
"calib/ece": 0.4183389763779528,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0005011316676203359,
"calib/mean_conf": 0.9970791338582677,
"calib/mu_c": 0.9968680272108844,
"calib/mu_w": 0.9973691588785047,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.4183389763779528,
"calib/std_conf": 0.004625374813264798,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2210.0,
"completions/max_terminated_length": 2210.0,
"completions/mean_length": 320.89453125,
"completions/mean_terminated_length": 324.6996154785156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.0672,
"grad_norm": 0.015316436998546124,
"learning_rate": 3.8055555555555556e-06,
"loss": -0.013,
"num_tokens": 15628152.0,
"reward": 1.3845548629760742,
"reward_std": 0.3264836072921753,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/brier_reward_group": 0.6657936573028564,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6146135330200195,
"step": 63
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5652515186676677,
"calib/avg_num_step_conf": 1.03125,
"calib/ece": 0.3528916666666667,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9920634920634921,
"calib/gap": 0.006846201624462478,
"calib/mean_conf": 0.9839234126984127,
"calib/mu_c": 0.9863956521739131,
"calib/mu_w": 0.9795494505494506,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.34896309523809527,
"calib/std_conf": 0.08773991941973767,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2232.0,
"completions/max_terminated_length": 2232.0,
"completions/mean_length": 264.7734375,
"completions/mean_terminated_length": 268.9761962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.01596958003938198,
"learning_rate": 3.777777777777778e-06,
"loss": -0.0237,
"num_tokens": 15799710.0,
"reward": 1.4842865467071533,
"reward_std": 0.4227965772151947,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/brier_reward_group": 0.766762375831604,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6938216686248779,
"step": 64
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5055081507896078,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.39497215686274506,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.009473484462557025,
"calib/mean_conf": 0.987129019607843,
"calib/mu_c": 0.9909927152317878,
"calib/mu_w": 0.9815192307692308,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.39497215686274506,
"calib/std_conf": 0.060790807008016354,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2194.0,
"completions/max_terminated_length": 2194.0,
"completions/mean_length": 240.83203125,
"completions/mean_terminated_length": 243.68775939941406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.016611957922577858,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.023,
"num_tokens": 15966387.0,
"reward": 1.412438988685608,
"reward_std": 0.22559529542922974,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/brier_reward_group": 0.6649578809738159,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6488604545593262,
"step": 65
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5029582152101566,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.5169803921568626,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9882352941176471,
"calib/gap": -0.011903734746702588,
"calib/mean_conf": 0.9813725490196078,
"calib/mu_c": 0.9751639344262295,
"calib/mu_w": 0.9870676691729321,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.5099607843137254,
"calib/std_conf": 0.08009356312642617,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2223.0,
"completions/max_terminated_length": 2223.0,
"completions/mean_length": 282.66015625,
"completions/mean_terminated_length": 286.0118713378906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.0704,
"grad_norm": 0.01374450046569109,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0006,
"num_tokens": 16145100.0,
"reward": 1.2697900533676147,
"reward_std": 0.32513415813446045,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/brier_reward_group": 0.5864582061767578,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6020768880844116,
"step": 66
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5362654320987654,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.4155555555555557,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.996031746031746,
"calib/gap": 0.007199074074074163,
"calib/mean_conf": 0.986984126984127,
"calib/mu_c": 0.9900694444444444,
"calib/mu_w": 0.9828703703703703,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.4155555555555557,
"calib/std_conf": 0.03877631909620279,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2185.0,
"completions/max_terminated_length": 2185.0,
"completions/mean_length": 253.66015625,
"completions/mean_terminated_length": 257.6865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.02686356194317341,
"learning_rate": 3.694444444444445e-06,
"loss": -0.0081,
"num_tokens": 16315045.0,
"reward": 1.3880245685577393,
"reward_std": 0.3380063772201538,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.6746812462806702,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6586668491363525,
"step": 67
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5094562647754137,
"calib/avg_num_step_conf": 1.01171875,
"calib/ece": 0.4336078431372549,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9882352941176471,
"calib/gap": 0.0020082120194101494,
"calib/mean_conf": 0.986549019607843,
"calib/mu_c": 0.9874468085106383,
"calib/mu_w": 0.9854385964912281,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4336078431372549,
"calib/std_conf": 0.025264338755730805,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 736.0,
"completions/max_terminated_length": 736.0,
"completions/mean_length": 245.125,
"completions/mean_terminated_length": 249.01588439941406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.018724871799349785,
"learning_rate": 3.6666666666666666e-06,
"loss": -0.0088,
"num_tokens": 16481885.0,
"reward": 1.3662382364273071,
"reward_std": 0.2699292302131653,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/brier_reward_group": 0.6460593938827515,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6392064094543457,
"step": 68
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5151656993072458,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.4046875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.98046875,
"calib/gap": 0.0201435436559948,
"calib/mean_conf": 0.9789062500000001,
"calib/mu_c": 0.9874829931972791,
"calib/mu_w": 0.9673394495412843,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4046875,
"calib/std_conf": 0.0800169354632974,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 244.9375,
"completions/mean_terminated_length": 248.82540893554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.0736,
"grad_norm": 0.02710888721048832,
"learning_rate": 3.638888888888889e-06,
"loss": -0.0106,
"num_tokens": 16649085.0,
"reward": 1.4051319360733032,
"reward_std": 0.2940649092197418,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/brier_reward_group": 0.6779281497001648,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6535372138023376,
"step": 69
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5332393483709273,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.4245275590551183,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9724409448818898,
"calib/gap": 0.029003759398496087,
"calib/mean_conf": 0.9701968503937008,
"calib/mu_c": 0.9832142857142856,
"calib/mu_w": 0.9542105263157895,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4217716535433073,
"calib/std_conf": 0.11772265146200765,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2166.0,
"completions/max_terminated_length": 2166.0,
"completions/mean_length": 289.89453125,
"completions/mean_terminated_length": 294.4960632324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.014959375374019146,
"learning_rate": 3.6111111111111115e-06,
"loss": -0.0152,
"num_tokens": 16830290.0,
"reward": 1.3668923377990723,
"reward_std": 0.24334564805030823,
"rewards/accuracy_reward_step": 0.546875,
"rewards/brier_reward_group": 0.6424806118011475,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6610262393951416,
"step": 70
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5500970066518847,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.3348412698412699,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9523809523809523,
"calib/gap": 0.014972283813747267,
"calib/mean_conf": 0.9626984126984128,
"calib/mu_c": 0.9679268292682928,
"calib/mu_w": 0.9529545454545455,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.32337301587301587,
"calib/std_conf": 0.1297397437510634,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2236.0,
"completions/max_terminated_length": 2236.0,
"completions/mean_length": 284.44140625,
"completions/mean_terminated_length": 287.8142395019531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.016708169132471085,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0293,
"num_tokens": 17007515.0,
"reward": 1.4961044788360596,
"reward_std": 0.44954103231430054,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.773381233215332,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6954114437103271,
"step": 71
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.57889874353289,
"calib/avg_num_step_conf": 1.01171875,
"calib/ece": 0.45803921568627454,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9764705882352941,
"calib/gap": 0.021350702143385192,
"calib/mean_conf": 0.9756862745098039,
"calib/mu_c": 0.9859848484848486,
"calib/mu_w": 0.9646341463414634,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.45803921568627454,
"calib/std_conf": 0.07645721351112288,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 961.0,
"completions/max_terminated_length": 961.0,
"completions/mean_length": 246.76171875,
"completions/mean_terminated_length": 250.6785888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.0768,
"grad_norm": 0.017632341012358665,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0086,
"num_tokens": 17175094.0,
"reward": 1.341632604598999,
"reward_std": 0.2776060104370117,
"rewards/accuracy_reward_step": 0.515625,
"rewards/brier_reward_group": 0.6244671940803528,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6873756647109985,
"step": 72
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5589257840215696,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.3068235294117647,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9647058823529412,
"calib/gap": 0.011943380161770878,
"calib/mean_conf": 0.9658039215686275,
"calib/mu_c": 0.9695977011494253,
"calib/mu_w": 0.9576543209876545,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2951372549019608,
"calib/std_conf": 0.128643148253082,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 736.0,
"completions/max_terminated_length": 736.0,
"completions/mean_length": 253.40625,
"completions/mean_terminated_length": 257.4285888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.017207786440849304,
"learning_rate": 3.5277777777777784e-06,
"loss": -0.0192,
"num_tokens": 17346998.0,
"reward": 1.5725383758544922,
"reward_std": 0.37019598484039307,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.8157491683959961,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.747841477394104,
"step": 73
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5547945205479452,
"calib/avg_num_step_conf": 1.01171875,
"calib/ece": 0.4038425196850395,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9488188976377953,
"calib/gap": 0.018434043632673647,
"calib/mean_conf": 0.9712440944881889,
"calib/mu_c": 0.9790821917808217,
"calib/mu_w": 0.960648148148148,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.40014173228346467,
"calib/std_conf": 0.09582823516115335,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2227.0,
"completions/max_terminated_length": 2227.0,
"completions/mean_length": 255.73828125,
"completions/mean_terminated_length": 257.751953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.014544480480253696,
"learning_rate": 3.5e-06,
"loss": 0.007,
"num_tokens": 17516395.0,
"reward": 1.4134066104888916,
"reward_std": 0.3412873446941376,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/brier_reward_group": 0.6993246078491211,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6886764764785767,
"step": 74
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5625055100061712,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.2105859375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.9609375,
"calib/gap": 0.026311381468747008,
"calib/mean_conf": 0.9723828125,
"calib/mu_c": 0.9782412060301505,
"calib/mu_w": 0.9519298245614035,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2028125,
"calib/std_conf": 0.0932328755299859,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 693.0,
"completions/max_terminated_length": 693.0,
"completions/mean_length": 240.109375,
"completions/mean_terminated_length": 243.920654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.08,
"grad_norm": 0.020223695784807205,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.022,
"num_tokens": 17682615.0,
"reward": 1.7042614221572876,
"reward_std": 0.23040184378623962,
"rewards/accuracy_reward_step": 0.77734375,
"rewards/brier_reward_group": 0.8712968826293945,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8363738059997559,
"step": 75
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5862294130450555,
"calib/avg_num_step_conf": 1.015625,
"calib/ece": 0.26847656250000007,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.9140625,
"calib/gap": 0.05851628818109267,
"calib/mean_conf": 0.9375390625000002,
"calib/mu_c": 0.9551396648044692,
"calib/mu_w": 0.8966233766233765,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2533984375000001,
"calib/std_conf": 0.1831660535187159,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1197.0,
"completions/max_terminated_length": 1197.0,
"completions/mean_length": 255.4296875,
"completions/mean_terminated_length": 259.484130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.012735992670059204,
"learning_rate": 3.444444444444445e-06,
"loss": -0.0105,
"num_tokens": 17851061.0,
"reward": 1.58017897605896,
"reward_std": 0.1934841275215149,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/brier_reward_group": 0.7768750190734863,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7469661235809326,
"step": 76
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5713551984738425,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.2831496062992125,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9291338582677166,
"calib/gap": 0.05980923031770535,
"calib/mean_conf": 0.9440157480314961,
"calib/mu_c": 0.9621468926553673,
"calib/mu_w": 0.902337662337662,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.26515748031496056,
"calib/std_conf": 0.18035948286432327,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2198.0,
"completions/max_terminated_length": 2198.0,
"completions/mean_length": 286.921875,
"completions/mean_terminated_length": 290.3241271972656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.012091055512428284,
"learning_rate": 3.416666666666667e-06,
"loss": -0.0109,
"num_tokens": 18029177.0,
"reward": 1.5749398469924927,
"reward_std": 0.30894869565963745,
"rewards/accuracy_reward_step": 0.69140625,
"rewards/brier_reward_group": 0.7807500958442688,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7690092325210571,
"step": 77
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5302408854166667,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.35859375000000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9765625,
"calib/gap": 0.01866666666666661,
"calib/mean_conf": 0.9796875,
"calib/mu_c": 0.9866875,
"calib/mu_w": 0.9680208333333334,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.35664062500000004,
"calib/std_conf": 0.07192050190140499,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 861.0,
"completions/max_terminated_length": 861.0,
"completions/mean_length": 323.45703125,
"completions/mean_terminated_length": 328.5912780761719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.0832,
"grad_norm": 0.0124210799112916,
"learning_rate": 3.3888888888888893e-06,
"loss": -0.0002,
"num_tokens": 18220006.0,
"reward": 1.478103518486023,
"reward_std": 0.24688759446144104,
"rewards/accuracy_reward_step": 0.625,
"rewards/brier_reward_group": 0.7166808247566223,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7035453915596008,
"step": 78
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.572663139329806,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.2931953125000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9765625,
"calib/gap": 0.031783985890652455,
"calib/mean_conf": 0.9767890625000002,
"calib/mu_c": 0.9868457142857142,
"calib/mu_w": 0.9550617283950618,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2931953125000002,
"calib/std_conf": 0.07936300621430045,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 887.0,
"completions/max_terminated_length": 887.0,
"completions/mean_length": 322.65625,
"completions/mean_terminated_length": 327.7778015136719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.0173186007887125,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0059,
"num_tokens": 18408982.0,
"reward": 1.5656778812408447,
"reward_std": 0.26752346754074097,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.779755711555481,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7720180749893188,
"step": 79
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5068493150684932,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.27507812500000006,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00027397260273953616,
"calib/mean_conf": 0.9899218750000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9897260273972602,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.27507812500000006,
"calib/std_conf": 0.0012475562048961974,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1052.0,
"completions/max_terminated_length": 1052.0,
"completions/mean_length": 272.59375,
"completions/mean_terminated_length": 276.920654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.017474321648478508,
"learning_rate": 3.3333333333333333e-06,
"loss": -0.02,
"num_tokens": 18580926.0,
"reward": 1.601799726486206,
"reward_std": 0.27312523126602173,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.8032468557357788,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7523896098136902,
"step": 80
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5323647871116225,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.2905098039215687,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9803921568627451,
"calib/gap": 0.024984177215189973,
"calib/mean_conf": 0.9785098039215687,
"calib/mu_c": 0.9862500000000001,
"calib/mu_w": 0.9612658227848101,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2894117647058824,
"calib/std_conf": 0.08085896037243229,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2236.0,
"completions/max_terminated_length": 2236.0,
"completions/mean_length": 332.3515625,
"completions/mean_terminated_length": 336.2925109863281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.0864,
"grad_norm": 0.015995075926184654,
"learning_rate": 3.3055555555555558e-06,
"loss": -0.0105,
"num_tokens": 18772256.0,
"reward": 1.5499467849731445,
"reward_std": 0.24558739364147186,
"rewards/accuracy_reward_step": 0.6875,
"rewards/brier_reward_group": 0.7626110315322876,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.718425989151001,
"step": 81
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5240963855421686,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.30980468749999995,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9921875,
"calib/gap": 0.01361445783132531,
"calib/mean_conf": 0.9855859375000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9763855421686746,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.30980468749999995,
"calib/std_conf": 0.05007463606703589,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1252.0,
"completions/max_terminated_length": 1252.0,
"completions/mean_length": 281.83203125,
"completions/mean_terminated_length": 286.3055725097656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.01421276107430458,
"learning_rate": 3.277777777777778e-06,
"loss": -0.0187,
"num_tokens": 18949957.0,
"reward": 1.5429801940917969,
"reward_std": 0.23315325379371643,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/brier_reward_group": 0.7639062404632568,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7127020359039307,
"step": 82
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5580784574468086,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.33783464566929144,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.9645669291338582,
"calib/gap": 0.0621316489361704,
"calib/mean_conf": 0.9638188976377953,
"calib/mu_c": 0.9868124999999999,
"calib/mu_w": 0.9246808510638295,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.3358661417322835,
"calib/std_conf": 0.14154929679234315,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2795.0,
"completions/max_terminated_length": 2795.0,
"completions/mean_length": 389.89453125,
"completions/mean_terminated_length": 394.5177917480469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.008663997985422611,
"learning_rate": 3.2500000000000002e-06,
"loss": -0.0166,
"num_tokens": 19157034.0,
"reward": 1.4604687690734863,
"reward_std": 0.23011240363121033,
"rewards/accuracy_reward_step": 0.625,
"rewards/brier_reward_group": 0.7134954929351807,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.6830668449401855,
"step": 83
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5376438134966023,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.3225625000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.94921875,
"calib/gap": 0.027094664603377283,
"calib/mean_conf": 0.9592031250000002,
"calib/mu_c": 0.9686227544910178,
"calib/mu_w": 0.9415280898876405,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.31471093750000007,
"calib/std_conf": 0.13959236745873457,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1104.0,
"completions/max_terminated_length": 1104.0,
"completions/mean_length": 310.03515625,
"completions/mean_terminated_length": 314.95635986328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.0896,
"grad_norm": 0.014482676982879639,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.0079,
"num_tokens": 19342323.0,
"reward": 1.518557071685791,
"reward_std": 0.255326509475708,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/brier_reward_group": 0.7570656538009644,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7234127521514893,
"step": 84
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5612244897959183,
"calib/avg_num_step_conf": 0.94921875,
"calib/ece": 0.3595686274509804,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.9529411764705882,
"calib/gap": 0.038367346938775526,
"calib/mean_conf": 0.9752549019607843,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9516326530612244,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.94140625,
"calib/pce": 0.3595686274509804,
"calib/std_conf": 0.0711408002229588,
"calib/step_conf_rate": 0.94140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1543.0,
"completions/max_terminated_length": 1543.0,
"completions/mean_length": 345.0390625,
"completions/mean_terminated_length": 350.5158996582031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 69.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.014872740022838116,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0112,
"num_tokens": 19538477.0,
"reward": 1.4445767402648926,
"reward_std": 0.3293463885784149,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/brier_reward_group": 0.7269442081451416,
"rewards/format_reward_step": 0.94140625,
"rewards/stepwise_brier_reward": 0.7154250741004944,
"step": 85
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5202144433932513,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.38691406250000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.97265625,
"calib/gap": 0.015009145380006084,
"calib/mean_conf": 0.9767578125000002,
"calib/mu_c": 0.9829139072847679,
"calib/mu_w": 0.9679047619047618,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.38691406250000004,
"calib/std_conf": 0.08345584967642977,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1345.0,
"completions/max_terminated_length": 1345.0,
"completions/mean_length": 381.8046875,
"completions/mean_terminated_length": 387.8651123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.012381580658257008,
"learning_rate": 3.1666666666666667e-06,
"loss": -0.0121,
"num_tokens": 19741731.0,
"reward": 1.417504072189331,
"reward_std": 0.18847745656967163,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/brier_reward_group": 0.6652394533157349,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6532144546508789,
"step": 86
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5466451959205583,
"calib/avg_num_step_conf": 0.97265625,
"calib/ece": 0.1575396825396825,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.9444444444444444,
"calib/gap": 0.03254106280193214,
"calib/mean_conf": 0.9665079365079366,
"calib/mu_c": 0.9723188405797101,
"calib/mu_w": 0.9397777777777779,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.15130952380952378,
"calib/std_conf": 0.10351610150283469,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 1513.0,
"completions/max_terminated_length": 1513.0,
"completions/mean_length": 329.05078125,
"completions/mean_terminated_length": 338.3011779785156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.0928,
"grad_norm": 0.012511593289673328,
"learning_rate": 3.138888888888889e-06,
"loss": -0.0192,
"num_tokens": 19931464.0,
"reward": 1.7223682403564453,
"reward_std": 0.24959589540958405,
"rewards/accuracy_reward_step": 0.80859375,
"rewards/brier_reward_group": 0.8768448829650879,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.832940399646759,
"step": 87
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5352112676056338,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.26070588235294123,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.984313725490196,
"calib/gap": 0.027746478873239888,
"calib/mean_conf": 0.9822745098039215,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9622535211267602,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.26070588235294123,
"calib/std_conf": 0.07082374655255184,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1217.0,
"completions/max_terminated_length": 1217.0,
"completions/mean_length": 371.45703125,
"completions/mean_terminated_length": 377.35321044921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.011493110097944736,
"learning_rate": 3.1111111111111116e-06,
"loss": -0.0056,
"num_tokens": 20136405.0,
"reward": 1.6156119108200073,
"reward_std": 0.20070913434028625,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.7902989387512207,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.812773585319519,
"step": 88
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5699464961503327,
"calib/avg_num_step_conf": 0.9765625,
"calib/ece": 0.3442745098039215,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9254901960784314,
"calib/gap": 0.055241419809473946,
"calib/mean_conf": 0.9591764705882353,
"calib/mu_c": 0.9801898734177213,
"calib/mu_w": 0.9249484536082474,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.3419215686274509,
"calib/std_conf": 0.11982220585097834,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1936.0,
"completions/max_terminated_length": 1936.0,
"completions/mean_length": 430.6953125,
"completions/mean_terminated_length": 437.5317687988281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.021054452285170555,
"learning_rate": 3.0833333333333336e-06,
"loss": -0.0069,
"num_tokens": 20355551.0,
"reward": 1.4646778106689453,
"reward_std": 0.28685474395751953,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/brier_reward_group": 0.731169581413269,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7056663036346436,
"step": 89
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5331556909021697,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.265234375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.953125,
"calib/gap": 0.026733155690901844,
"calib/mean_conf": 0.9652343750000001,
"calib/mu_c": 0.9726486486486486,
"calib/mu_w": 0.9459154929577468,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.25390625,
"calib/std_conf": 0.12671304221886306,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1178.0,
"completions/max_terminated_length": 1178.0,
"completions/mean_length": 374.98046875,
"completions/mean_terminated_length": 380.93255615234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.096,
"grad_norm": 0.010848317295312881,
"learning_rate": 3.055555555555556e-06,
"loss": -0.0062,
"num_tokens": 20554866.0,
"reward": 1.616170883178711,
"reward_std": 0.21723613142967224,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/brier_reward_group": 0.8079821467399597,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7817014455795288,
"step": 90
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.524192425508215,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.3061172549019607,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9686274509803922,
"calib/gap": 0.02881906850459459,
"calib/mean_conf": 0.9767054901960786,
"calib/mu_c": 0.9861988304093566,
"calib/mu_w": 0.957379761904762,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3061172549019607,
"calib/std_conf": 0.08316363016140202,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1454.0,
"completions/max_terminated_length": 1454.0,
"completions/mean_length": 411.9609375,
"completions/mean_terminated_length": 418.5000305175781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.03383542597293854,
"learning_rate": 3.0277777777777776e-06,
"loss": -0.0261,
"num_tokens": 20768040.0,
"reward": 1.5384074449539185,
"reward_std": 0.266568124294281,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/brier_reward_group": 0.761809766292572,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7355697154998779,
"step": 91
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5954464285714286,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.18816406250000003,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.93359375,
"calib/gap": 0.06655,
"calib/mean_conf": 0.9682421875000001,
"calib/mu_c": 0.9828,
"calib/mu_w": 0.91625,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.187578125,
"calib/std_conf": 0.08356109722960106,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 846.0,
"completions/max_terminated_length": 846.0,
"completions/mean_length": 339.3046875,
"completions/mean_terminated_length": 344.69049072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.009851209819316864,
"learning_rate": 3e-06,
"loss": 0.0064,
"num_tokens": 20961622.0,
"reward": 1.707207441329956,
"reward_std": 0.19086013734340668,
"rewards/accuracy_reward_step": 0.78125,
"rewards/brier_reward_group": 0.8576591610908508,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8617957234382629,
"step": 92
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5352112676056338,
"calib/avg_num_step_conf": 0.96875,
"calib/ece": 0.2628346456692914,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.9803149606299213,
"calib/gap": 0.023943661971830954,
"calib/mean_conf": 0.9833070866141732,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9660563380281688,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.2628346456692914,
"calib/std_conf": 0.04723129747943542,
"calib/step_conf_rate": 0.96875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2236.0,
"completions/max_terminated_length": 2236.0,
"completions/mean_length": 374.01953125,
"completions/mean_terminated_length": 379.95635986328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.0992,
"grad_norm": 0.0120620746165514,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0069,
"num_tokens": 21163147.0,
"reward": 1.5767902135849,
"reward_std": 0.1930733025074005,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.7593777179718018,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.7509081363677979,
"step": 93
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5295732838589982,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.2861111111111111,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9761904761904762,
"calib/gap": 0.024550649350649545,
"calib/mean_conf": 0.9805555555555555,
"calib/mu_c": 0.9880571428571426,
"calib/mu_w": 0.9635064935064931,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.2861111111111111,
"calib/std_conf": 0.06498236092233464,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2579.0,
"completions/max_terminated_length": 2579.0,
"completions/mean_length": 353.75390625,
"completions/mean_terminated_length": 360.8008117675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.009626048617064953,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0035,
"num_tokens": 21362388.0,
"reward": 1.5426125526428223,
"reward_std": 0.1176624447107315,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.7276296615600586,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7553207278251648,
"step": 94
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6207837301587301,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.20176470588235298,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9254901960784314,
"calib/gap": 0.13117807539682513,
"calib/mean_conf": 0.953372549019608,
"calib/mu_c": 0.98578125,
"calib/mu_w": 0.8546031746031749,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2010980392156863,
"calib/std_conf": 0.14701339952058975,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2306.0,
"completions/max_terminated_length": 2306.0,
"completions/mean_length": 422.109375,
"completions/mean_terminated_length": 427.1146545410156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.010757964104413986,
"learning_rate": 2.916666666666667e-06,
"loss": -0.0067,
"num_tokens": 21576576.0,
"reward": 1.6540206670761108,
"reward_std": 0.19147750735282898,
"rewards/accuracy_reward_step": 0.75,
"rewards/brier_reward_group": 0.84278404712677,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8045485019683838,
"step": 95
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5908581899623333,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.16324218749999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.95703125,
"calib/gap": 0.059284332688587904,
"calib/mean_conf": 0.9745703125000001,
"calib/mu_c": 0.9854545454545455,
"calib/mu_w": 0.9261702127659576,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.16070312499999997,
"calib/std_conf": 0.07516005667009801,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1115.0,
"completions/max_terminated_length": 1115.0,
"completions/mean_length": 335.27734375,
"completions/mean_terminated_length": 340.5992126464844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.1024,
"grad_norm": 0.009875332936644554,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0047,
"num_tokens": 21768223.0,
"reward": 1.7429949045181274,
"reward_std": 0.11069345474243164,
"rewards/accuracy_reward_step": 0.81640625,
"rewards/brier_reward_group": 0.8624937534332275,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8516731262207031,
"step": 96
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4899955203822607,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.2894509803921569,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9725490196078431,
"calib/gap": -0.01741302075556217,
"calib/mean_conf": 0.9734509803921568,
"calib/mu_c": 0.9683977900552485,
"calib/mu_w": 0.9858108108108107,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.27654901960784317,
"calib/std_conf": 0.11195172519237315,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 864.0,
"completions/max_terminated_length": 864.0,
"completions/mean_length": 347.234375,
"completions/mean_terminated_length": 352.7460632324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.017515143379569054,
"learning_rate": 2.861111111111111e-06,
"loss": 0.024,
"num_tokens": 21962187.0,
"reward": 1.5750223398208618,
"reward_std": 0.22100487351417542,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/brier_reward_group": 0.7652446031570435,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7301572561264038,
"step": 97
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5108540231379176,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.37219607843137265,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9725490196078431,
"calib/gap": 0.00457818796308318,
"calib/mean_conf": 0.9789411764705883,
"calib/mu_c": 0.980700636942675,
"calib/mu_w": 0.9761224489795918,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.36772549019607853,
"calib/std_conf": 0.07737897869391891,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1349.0,
"completions/max_terminated_length": 1349.0,
"completions/mean_length": 401.78515625,
"completions/mean_terminated_length": 408.1627197265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.015487849712371826,
"learning_rate": 2.8333333333333335e-06,
"loss": -0.0141,
"num_tokens": 22171228.0,
"reward": 1.4574542045593262,
"reward_std": 0.2990908622741699,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/brier_reward_group": 0.719641923904419,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6726752519607544,
"step": 98
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5323135635635636,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.4135294117647059,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9647058823529412,
"calib/gap": 0.02889076576576588,
"calib/mean_conf": 0.9705490196078431,
"calib/mu_c": 0.9831249999999999,
"calib/mu_w": 0.954234234234234,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.40968627450980394,
"calib/std_conf": 0.11679583556326477,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2163.0,
"completions/max_terminated_length": 2163.0,
"completions/mean_length": 452.46484375,
"completions/mean_terminated_length": 457.8300476074219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.1056,
"grad_norm": 0.011346619576215744,
"learning_rate": 2.805555555555556e-06,
"loss": -0.005,
"num_tokens": 22392859.0,
"reward": 1.381075382232666,
"reward_std": 0.31315329670906067,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.6842589378356934,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.5978550314903259,
"step": 99
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.499829966673468,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.33230468750000003,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.98828125,
"calib/gap": -0.0015949126028699245,
"calib/mean_conf": 0.9850390625000001,
"calib/mu_c": 0.9844970414201183,
"calib/mu_w": 0.9860919540229882,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.3285937500000001,
"calib/std_conf": 0.05423471880743085,
"calib/step_conf_rate": 0.96875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1726.0,
"completions/max_terminated_length": 1726.0,
"completions/mean_length": 419.6015625,
"completions/mean_terminated_length": 426.2619323730469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.011520988307893276,
"learning_rate": 2.7777777777777783e-06,
"loss": -0.0082,
"num_tokens": 22607685.0,
"reward": 1.5046149492263794,
"reward_std": 0.26636022329330444,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.7232612371444702,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.717073380947113,
"step": 100
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.3298437500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3298437500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1144.0,
"completions/max_terminated_length": 1144.0,
"completions/mean_length": 424.71875,
"completions/mean_terminated_length": 431.4603271484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.009321141988039017,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0234,
"num_tokens": 22823405.0,
"reward": 1.5158978700637817,
"reward_std": 0.21507184207439423,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.734685480594635,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6960934400558472,
"step": 101
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.2478125000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2478125000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 965.0,
"completions/max_terminated_length": 965.0,
"completions/mean_length": 328.71484375,
"completions/mean_terminated_length": 333.93255615234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.1088,
"grad_norm": 0.01641284115612507,
"learning_rate": 2.7222222222222224e-06,
"loss": -0.0036,
"num_tokens": 23014252.0,
"reward": 1.6270155906677246,
"reward_std": 0.12798628211021423,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/brier_reward_group": 0.7765922546386719,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7705326080322266,
"step": 102
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.3610937500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3610937500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1559.0,
"completions/max_terminated_length": 1559.0,
"completions/mean_length": 433.1640625,
"completions/mean_terminated_length": 440.0397033691406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.023044515401124954,
"learning_rate": 2.6944444444444444e-06,
"loss": -0.0256,
"num_tokens": 23229694.0,
"reward": 1.4751359224319458,
"reward_std": 0.16185157001018524,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/brier_reward_group": 0.693356990814209,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6915618777275085,
"step": 103
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.4235937500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4235937500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 965.0,
"completions/max_terminated_length": 965.0,
"completions/mean_length": 412.09765625,
"completions/mean_terminated_length": 418.638916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.022061089053750038,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0253,
"num_tokens": 23441871.0,
"reward": 1.3992860317230225,
"reward_std": 0.2274901121854782,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/brier_reward_group": 0.6506617069244385,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6808575987815857,
"step": 104
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5071428571428571,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.2647430830039527,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9960474308300395,
"calib/gap": 0.006999999999999895,
"calib/mean_conf": 0.9880632411067194,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9829999999999999,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2647430830039527,
"calib/std_conf": 0.030745094286679438,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2493.0,
"completions/max_terminated_length": 2493.0,
"completions/mean_length": 427.90234375,
"completions/mean_terminated_length": 436.4263000488281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.112,
"grad_norm": 0.009036574512720108,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0088,
"num_tokens": 23657174.0,
"reward": 1.5890259742736816,
"reward_std": 0.17864654958248138,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.7721549272537231,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7480111122131348,
"step": 105
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5052631578947369,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.3601181102362204,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": 0.010421052631579109,
"calib/mean_conf": 0.9861023622047244,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9795789473684207,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3601181102362204,
"calib/std_conf": 0.0619957243440197,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2409.0,
"completions/max_terminated_length": 2409.0,
"completions/mean_length": 441.67578125,
"completions/mean_terminated_length": 446.9130554199219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.007855425588786602,
"learning_rate": 2.6111111111111113e-06,
"loss": -0.0065,
"num_tokens": 23874827.0,
"reward": 1.466196894645691,
"reward_std": 0.19247515499591827,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/brier_reward_group": 0.6923063397407532,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7115440368652344,
"step": 106
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.2751562500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2751562500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1012.0,
"completions/max_terminated_length": 1012.0,
"completions/mean_length": 397.40625,
"completions/mean_terminated_length": 403.71429443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.021294519305229187,
"learning_rate": 2.5833333333333337e-06,
"loss": -0.0191,
"num_tokens": 24081179.0,
"reward": 1.6048345565795898,
"reward_std": 0.22603082656860352,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.7892944812774658,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.770668625831604,
"step": 107
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.2087500000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2087500000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1692.0,
"completions/max_terminated_length": 1692.0,
"completions/mean_length": 437.31640625,
"completions/mean_terminated_length": 444.2579650878906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.1152,
"grad_norm": 0.07678169012069702,
"learning_rate": 2.5555555555555557e-06,
"loss": -0.003,
"num_tokens": 24296364.0,
"reward": 1.6970703601837158,
"reward_std": 0.17200732231140137,
"rewards/accuracy_reward_step": 0.78125,
"rewards/brier_reward_group": 0.8411890864372253,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8220928311347961,
"step": 108
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5045454545454545,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.42287401574803163,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00045454545454559625,
"calib/mean_conf": 0.9898031496062993,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9895454545454544,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.42287401574803163,
"calib/std_conf": 0.0031310971890919055,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2177.0,
"completions/max_terminated_length": 2177.0,
"completions/mean_length": 448.69921875,
"completions/mean_terminated_length": 455.8214416503906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.035480234771966934,
"learning_rate": 2.5277777777777778e-06,
"loss": -0.0236,
"num_tokens": 24515831.0,
"reward": 1.3797364234924316,
"reward_std": 0.21544721722602844,
"rewards/accuracy_reward_step": 0.5625,
"rewards/brier_reward_group": 0.6426604986190796,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6419100761413574,
"step": 109
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.3037254901960784,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3037254901960784,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2203.0,
"completions/max_terminated_length": 2203.0,
"completions/mean_length": 392.36328125,
"completions/mean_terminated_length": 397.0158386230469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.009675406850874424,
"learning_rate": 2.5e-06,
"loss": -0.0201,
"num_tokens": 24721196.0,
"reward": 1.5546002388000488,
"reward_std": 0.3171882629394531,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.774333119392395,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7487553954124451,
"step": 110
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.512987012987013,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.2879215686274511,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.013376623376623709,
"calib/mean_conf": 0.9859607843137255,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9766233766233763,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.2879215686274511,
"calib/std_conf": 0.06191517308550772,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2337.0,
"completions/max_terminated_length": 2337.0,
"completions/mean_length": 445.23828125,
"completions/mean_terminated_length": 452.3055725097656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.1184,
"grad_norm": 0.009541746228933334,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0303,
"num_tokens": 24942585.0,
"reward": 1.5594208240509033,
"reward_std": 0.22527828812599182,
"rewards/accuracy_reward_step": 0.6953125,
"rewards/brier_reward_group": 0.7607840895652771,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7347120046615601,
"step": 111
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.96484375,
"calib/ece": 0.3193172690763053,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.96484375,
"calib/pce": 0.3193172690763053,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.96484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2270.0,
"completions/max_terminated_length": 2270.0,
"completions/mean_length": 499.984375,
"completions/mean_terminated_length": 509.9442443847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.011667572893202305,
"learning_rate": 2.4444444444444447e-06,
"loss": -0.0236,
"num_tokens": 25178501.0,
"reward": 1.4918447732925415,
"reward_std": 0.2304956316947937,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/brier_reward_group": 0.712691605091095,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.7156248688697815,
"step": 112
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.3128346456692913,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3128346456692913,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2429.0,
"completions/max_terminated_length": 2429.0,
"completions/mean_length": 431.53125,
"completions/mean_terminated_length": 438.3809814453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.01603376492857933,
"learning_rate": 2.4166666666666667e-06,
"loss": -0.0056,
"num_tokens": 25394173.0,
"reward": 1.5291264057159424,
"reward_std": 0.18222017586231232,
"rewards/accuracy_reward_step": 0.671875,
"rewards/brier_reward_group": 0.7347449064254761,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7176977396011353,
"step": 113
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.03125,
"calib/ece": 0.2009375000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2009375000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1130.0,
"completions/max_terminated_length": 1130.0,
"completions/mean_length": 419.265625,
"completions/mean_terminated_length": 425.920654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.1216,
"grad_norm": 0.007103959564119577,
"learning_rate": 2.388888888888889e-06,
"loss": -0.01,
"num_tokens": 25606529.0,
"reward": 1.7007160186767578,
"reward_std": 0.19283781945705414,
"rewards/accuracy_reward_step": 0.7890625,
"rewards/brier_reward_group": 0.8586108684539795,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7880029678344727,
"step": 114
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.3493750000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3493750000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1048.0,
"completions/max_terminated_length": 1048.0,
"completions/mean_length": 407.21484375,
"completions/mean_terminated_length": 413.6785888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.04583623260259628,
"learning_rate": 2.361111111111111e-06,
"loss": -0.0278,
"num_tokens": 25816040.0,
"reward": 1.4954776763916016,
"reward_std": 0.2661696672439575,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.7326734662055969,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6867371797561646,
"step": 115
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.2400000000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2400000000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1088.0,
"completions/max_terminated_length": 1088.0,
"completions/mean_length": 467.328125,
"completions/mean_terminated_length": 474.7460632324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 76.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.024667803198099136,
"learning_rate": 2.3333333333333336e-06,
"loss": -0.0079,
"num_tokens": 26040196.0,
"reward": 1.6541872024536133,
"reward_std": 0.1952700912952423,
"rewards/accuracy_reward_step": 0.75,
"rewards/brier_reward_group": 0.8135454058647156,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8110154867172241,
"step": 116
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.0234375,
"calib/ece": 0.3650000000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3650000000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1099.0,
"completions/max_terminated_length": 1099.0,
"completions/mean_length": 431.20703125,
"completions/mean_terminated_length": 438.0516052246094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.1248,
"grad_norm": 0.022631388157606125,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0036,
"num_tokens": 26257185.0,
"reward": 1.4794007539749146,
"reward_std": 0.2126893401145935,
"rewards/accuracy_reward_step": 0.625,
"rewards/brier_reward_group": 0.7046656608581543,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7129372954368591,
"step": 117
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.504869684499314,
"calib/avg_num_step_conf": 1.01171875,
"calib/ece": 0.35063492063492074,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.996031746031746,
"calib/gap": -0.005358024691357932,
"calib/mean_conf": 0.9855555555555555,
"calib/mu_c": 0.983641975308642,
"calib/mu_w": 0.9889999999999999,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.3466666666666668,
"calib/std_conf": 0.06242526395816772,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1703.0,
"completions/max_terminated_length": 1703.0,
"completions/mean_length": 480.93359375,
"completions/mean_terminated_length": 490.5139465332031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.012477186508476734,
"learning_rate": 2.277777777777778e-06,
"loss": -0.0044,
"num_tokens": 26484312.0,
"reward": 1.4674220085144043,
"reward_std": 0.29449427127838135,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/brier_reward_group": 0.7023806571960449,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.6907451152801514,
"step": 118
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.03125,
"calib/ece": 0.3403937007874016,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3403937007874016,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2472.0,
"completions/max_terminated_length": 2472.0,
"completions/mean_length": 498.09765625,
"completions/mean_terminated_length": 506.0039978027344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.009062022902071476,
"learning_rate": 2.25e-06,
"loss": -0.0133,
"num_tokens": 26716889.0,
"reward": 1.4937986135482788,
"reward_std": 0.28233009576797485,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/brier_reward_group": 0.7360901236534119,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6844169497489929,
"step": 119
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.09765625,
"calib/ece": 0.23409448818897638,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23409448818897638,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1648.0,
"completions/max_terminated_length": 1648.0,
"completions/mean_length": 462.0859375,
"completions/mean_terminated_length": 469.420654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.128,
"grad_norm": 0.032117150723934174,
"learning_rate": 2.222222222222222e-06,
"loss": -0.0033,
"num_tokens": 26941871.0,
"reward": 1.6441919803619385,
"reward_std": 0.1847691833972931,
"rewards/accuracy_reward_step": 0.75,
"rewards/brier_reward_group": 0.8027962446212769,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.789596438407898,
"step": 120
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5057471264367817,
"calib/avg_num_step_conf": 1.2890625,
"calib/ece": 0.30772509803921566,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00011436781609175384,
"calib/mean_conf": 0.9900780392156863,
"calib/mu_c": 0.9901143678160917,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.30772509803921566,
"calib/std_conf": 0.0008777355437354933,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1456.0,
"completions/max_terminated_length": 1456.0,
"completions/mean_length": 527.12890625,
"completions/mean_terminated_length": 535.4960327148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.11755120009183884,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.0216,
"num_tokens": 27181872.0,
"reward": 1.556952714920044,
"reward_std": 0.24504821002483368,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/brier_reward_group": 0.7592884302139282,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7575849890708923,
"step": 121
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49021639042357273,
"calib/avg_num_step_conf": 1.59375,
"calib/ece": 0.2728458498023714,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9920948616600791,
"calib/gap": 0.01103974831184762,
"calib/mean_conf": 0.9847035573122529,
"calib/mu_c": 0.9878453038674031,
"calib/mu_w": 0.9768055555555555,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2710671936758892,
"calib/std_conf": 0.06793110805728833,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2496.0,
"completions/max_terminated_length": 2496.0,
"completions/mean_length": 538.4375,
"completions/mean_terminated_length": 549.1633911132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.009088210761547089,
"learning_rate": 2.166666666666667e-06,
"loss": -0.0168,
"num_tokens": 27427056.0,
"reward": 1.5797841548919678,
"reward_std": 0.24107559025287628,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/brier_reward_group": 0.7796617150306702,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7425999641418457,
"step": 122
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.48445945945945945,
"calib/avg_num_step_conf": 1.7890625,
"calib/ece": 0.2819291338582677,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0003108108108108576,
"calib/mean_conf": 0.9905905511811023,
"calib/mu_c": 0.9904999999999998,
"calib/mu_w": 0.9908108108108107,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2819291338582677,
"calib/std_conf": 0.0023572783275468846,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2485.0,
"completions/max_terminated_length": 2485.0,
"completions/mean_length": 568.74609375,
"completions/mean_terminated_length": 577.7738647460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.1312,
"grad_norm": 0.011878207325935364,
"learning_rate": 2.138888888888889e-06,
"loss": -0.0138,
"num_tokens": 27677943.0,
"reward": 1.5732364654541016,
"reward_std": 0.26530539989471436,
"rewards/accuracy_reward_step": 0.703125,
"rewards/brier_reward_group": 0.7894773483276367,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7144062519073486,
"step": 123
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.53125,
"calib/avg_num_step_conf": 1.88671875,
"calib/ece": 0.24046875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006249999999999867,
"calib/mean_conf": 0.99046875,
"calib/mu_c": 0.9906250000000001,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24046875,
"calib/std_conf": 0.002113710821635734,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1323.0,
"completions/max_terminated_length": 1323.0,
"completions/mean_length": 527.828125,
"completions/mean_terminated_length": 538.3426513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.13055697083473206,
"learning_rate": 2.1111111111111114e-06,
"loss": -0.003,
"num_tokens": 27919883.0,
"reward": 1.6604065895080566,
"reward_std": 0.19660840928554535,
"rewards/accuracy_reward_step": 0.75,
"rewards/brier_reward_group": 0.8212425708770752,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8203842639923096,
"step": 124
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.45983563096500535,
"calib/avg_num_step_conf": 1.9296875,
"calib/ece": 0.35027343750000006,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0008032873806997909,
"calib/mean_conf": 0.9908984375000001,
"calib/mu_c": 0.9906097560975609,
"calib/mu_w": 0.9914130434782606,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35027343750000006,
"calib/std_conf": 0.0028595777762798766,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1631.0,
"completions/max_terminated_length": 1631.0,
"completions/mean_length": 542.22265625,
"completions/mean_terminated_length": 550.8294067382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.015504639595746994,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0214,
"num_tokens": 28163500.0,
"reward": 1.4953252077102661,
"reward_std": 0.22951993346214294,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.7288151979446411,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6899855136871338,
"step": 125
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.48952496179317373,
"calib/avg_num_step_conf": 2.00390625,
"calib/ece": 0.398392156862745,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0002095007641369495,
"calib/mean_conf": 0.990549019607843,
"calib/mu_c": 0.99046357615894,
"calib/mu_w": 0.9906730769230769,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.398392156862745,
"calib/std_conf": 0.002277887957875705,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2454.0,
"completions/max_terminated_length": 2454.0,
"completions/mean_length": 585.51171875,
"completions/mean_terminated_length": 592.45458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.1344,
"grad_norm": 0.010014167055487633,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0058,
"num_tokens": 28418855.0,
"reward": 1.4250679016113281,
"reward_std": 0.30080366134643555,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/brier_reward_group": 0.6859462857246399,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6705750226974487,
"step": 126
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4936708860759494,
"calib/avg_num_step_conf": 1.9296875,
"calib/ece": 0.3100760000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.996,
"calib/gap": -0.005804944851580007,
"calib/mean_conf": 0.9861560000000001,
"calib/mu_c": 0.9843216374269005,
"calib/mu_w": 0.9901265822784805,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.30611600000000005,
"calib/std_conf": 0.061867177598464926,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2954.0,
"completions/max_terminated_length": 2954.0,
"completions/mean_length": 544.22265625,
"completions/mean_terminated_length": 552.8611450195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.011544213630259037,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0069,
"num_tokens": 28661848.0,
"reward": 1.5123376846313477,
"reward_std": 0.30747634172439575,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/brier_reward_group": 0.746268630027771,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.6937066316604614,
"step": 127
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.95703125,
"calib/ece": 0.33523809523809533,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.33523809523809533,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2419.0,
"completions/max_terminated_length": 2419.0,
"completions/mean_length": 597.9375,
"completions/mean_terminated_length": 605.0277099609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.00843075942248106,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0135,
"num_tokens": 28921584.0,
"reward": 1.4812567234039307,
"reward_std": 0.2827935218811035,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/brier_reward_group": 0.7225621938705444,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.6946519613265991,
"step": 128
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4970414201183432,
"calib/avg_num_step_conf": 1.96875,
"calib/ece": 0.32862204724409444,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": -0.0058579881656802835,
"calib/mean_conf": 0.9861023622047244,
"calib/mu_c": 0.9841420118343195,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3246850393700787,
"calib/std_conf": 0.06199572434401969,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2199.0,
"completions/max_terminated_length": 2199.0,
"completions/mean_length": 510.7734375,
"completions/mean_terminated_length": 520.9482421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.1376,
"grad_norm": 0.00900917686522007,
"learning_rate": 1.9722222222222224e-06,
"loss": -0.0236,
"num_tokens": 29154726.0,
"reward": 1.5183894634246826,
"reward_std": 0.26133912801742554,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.737421452999115,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7267614603042603,
"step": 129
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5068493150684932,
"calib/avg_num_step_conf": 1.95703125,
"calib/ece": 0.277244094488189,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005479452054794054,
"calib/mean_conf": 0.9898425196850393,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9894520547945206,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.277244094488189,
"calib/std_conf": 0.0025048777512735243,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1967.0,
"completions/max_terminated_length": 1967.0,
"completions/mean_length": 482.98046875,
"completions/mean_terminated_length": 492.60162353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.025695854797959328,
"learning_rate": 1.944444444444445e-06,
"loss": -0.0101,
"num_tokens": 29383657.0,
"reward": 1.595362663269043,
"reward_std": 0.15114884078502655,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/brier_reward_group": 0.7583874464035034,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8105632066726685,
"step": 130
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5076923076923077,
"calib/avg_num_step_conf": 2.0859375,
"calib/ece": 0.49378906249999993,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.99609375,
"calib/gap": 0.0079230769230767,
"calib/mean_conf": 0.9859765625000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9820769230769232,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.49378906249999993,
"calib/std_conf": 0.061794640347554344,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2128.0,
"completions/max_terminated_length": 2128.0,
"completions/mean_length": 513.6484375,
"completions/mean_terminated_length": 521.8016357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.013012518174946308,
"learning_rate": 1.916666666666667e-06,
"loss": -0.0065,
"num_tokens": 29621359.0,
"reward": 1.269148826599121,
"reward_std": 0.22060997784137726,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/brier_reward_group": 0.5776644349098206,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.5301808714866638,
"step": 131
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.521974306964165,
"calib/avg_num_step_conf": 1.87890625,
"calib/ece": 0.18681102362204716,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": 0.02004926108374383,
"calib/mean_conf": 0.986023622047244,
"calib/mu_c": 0.9900492610837439,
"calib/mu_w": 0.9700000000000001,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.18681102362204716,
"calib/std_conf": 0.062022470472996194,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1954.0,
"completions/max_terminated_length": 1954.0,
"completions/mean_length": 491.2734375,
"completions/mean_terminated_length": 499.07147216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.1408,
"grad_norm": 0.008971575647592545,
"learning_rate": 1.888888888888889e-06,
"loss": -0.014,
"num_tokens": 29852717.0,
"reward": 1.7127364873886108,
"reward_std": 0.2484358251094818,
"rewards/accuracy_reward_step": 0.79296875,
"rewards/brier_reward_group": 0.870286762714386,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8322217464447021,
"step": 132
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.49993064225273964,
"calib/avg_num_step_conf": 1.84765625,
"calib/ece": 0.34844621513944235,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9960159362549801,
"calib/gap": -0.005487584963240422,
"calib/mean_conf": 0.9858964143426296,
"calib/mu_c": 0.9839506172839506,
"calib/mu_w": 0.989438202247191,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.34446215139442243,
"calib/std_conf": 0.06244274849598392,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2597.0,
"completions/max_terminated_length": 2597.0,
"completions/mean_length": 625.328125,
"completions/mean_terminated_length": 635.2540283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.011458762921392918,
"learning_rate": 1.8611111111111113e-06,
"loss": -0.0191,
"num_tokens": 30119145.0,
"reward": 1.4716432094573975,
"reward_std": 0.3156856596469879,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/brier_reward_group": 0.7177450656890869,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6844528913497925,
"step": 133
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5031847133757962,
"calib/avg_num_step_conf": 1.8984375,
"calib/ece": 0.3767578125000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 6.369426751584584e-05,
"calib/mean_conf": 0.9900390625000002,
"calib/mu_c": 0.9900636942675157,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3767578125000002,
"calib/std_conf": 0.0006237781024480985,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1810.0,
"completions/max_terminated_length": 1810.0,
"completions/mean_length": 582.15234375,
"completions/mean_terminated_length": 591.3928833007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.009385544806718826,
"learning_rate": 1.8333333333333333e-06,
"loss": -0.0148,
"num_tokens": 30377128.0,
"reward": 1.4562424421310425,
"reward_std": 0.3281945586204529,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/brier_reward_group": 0.7188370823860168,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6764452457427979,
"step": 134
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.9375,
"calib/ece": 0.28803921568627455,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.28803921568627455,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1595.0,
"completions/max_terminated_length": 1595.0,
"completions/mean_length": 527.69921875,
"completions/mean_terminated_length": 536.075439453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.144,
"grad_norm": 0.007142702117562294,
"learning_rate": 1.8055555555555557e-06,
"loss": -0.0229,
"num_tokens": 30618099.0,
"reward": 1.5674383640289307,
"reward_std": 0.22207137942314148,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/brier_reward_group": 0.7638119459152222,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7246912717819214,
"step": 135
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.90625,
"calib/ece": 0.395511811023622,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999997,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.395511811023622,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2462.0,
"completions/max_terminated_length": 2462.0,
"completions/mean_length": 543.109375,
"completions/mean_terminated_length": 551.7301635742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.009068154729902744,
"learning_rate": 1.777777777777778e-06,
"loss": -0.0087,
"num_tokens": 30865623.0,
"reward": 1.410581350326538,
"reward_std": 0.24024391174316406,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/brier_reward_group": 0.6730188131332397,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6333686113357544,
"step": 136
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.502540650406504,
"calib/avg_num_step_conf": 1.96875,
"calib/ece": 0.3443700787401575,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9921259842519685,
"calib/gap": 0.0050243902439025545,
"calib/mean_conf": 0.9822440944881891,
"calib/mu_c": 0.9840243902439024,
"calib/mu_w": 0.9789999999999999,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.34047244094488194,
"calib/std_conf": 0.08706095398753345,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2251.0,
"completions/max_terminated_length": 2251.0,
"completions/mean_length": 527.01953125,
"completions/mean_terminated_length": 533.268798828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.007421621587127447,
"learning_rate": 1.75e-06,
"loss": -0.0042,
"num_tokens": 31107524.0,
"reward": 1.4851226806640625,
"reward_std": 0.24835412204265594,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.7215331792831421,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6955199241638184,
"step": 137
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5056818181818181,
"calib/avg_num_step_conf": 1.96875,
"calib/ece": 0.3349411764705882,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00045454545454515216,
"calib/mean_conf": 0.9898431372549019,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9895454545454547,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3349411764705882,
"calib/std_conf": 0.00249998077655047,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1179.0,
"completions/max_terminated_length": 1179.0,
"completions/mean_length": 500.7265625,
"completions/mean_terminated_length": 508.67462158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.1472,
"grad_norm": 0.033796265721321106,
"learning_rate": 1.7222222222222224e-06,
"loss": -0.0122,
"num_tokens": 31340046.0,
"reward": 1.5078368186950684,
"reward_std": 0.23290802538394928,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/brier_reward_group": 0.7345907092094421,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6951937675476074,
"step": 138
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.0078125,
"calib/ece": 0.2868750000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2868750000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1035.0,
"completions/max_terminated_length": 1035.0,
"completions/mean_length": 447.953125,
"completions/mean_terminated_length": 455.0635070800781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.00960354134440422,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0104,
"num_tokens": 31557818.0,
"reward": 1.5895135402679443,
"reward_std": 0.1642017364501953,
"rewards/accuracy_reward_step": 0.703125,
"rewards/brier_reward_group": 0.7695093154907227,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7760449647903442,
"step": 139
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5064935064935066,
"calib/avg_num_step_conf": 2.03515625,
"calib/ece": 0.29011764705882354,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.006103896103896389,
"calib/mean_conf": 0.988156862745098,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9838961038961036,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.29011764705882354,
"calib/std_conf": 0.029374774124467987,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1857.0,
"completions/max_terminated_length": 1857.0,
"completions/mean_length": 501.19921875,
"completions/mean_terminated_length": 509.15478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.0098324716091156,
"learning_rate": 1.6666666666666667e-06,
"loss": -0.034,
"num_tokens": 31791141.0,
"reward": 1.5700232982635498,
"reward_std": 0.1776837408542633,
"rewards/accuracy_reward_step": 0.6953125,
"rewards/brier_reward_group": 0.7559343576431274,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7507213950157166,
"step": 140
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4930555555555556,
"calib/avg_num_step_conf": 2.09765625,
"calib/ece": 0.2712890625000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00013888888888868856,
"calib/mean_conf": 0.9900390625000002,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9901388888888888,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2712890625000002,
"calib/std_conf": 0.0006237781024480986,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1893.0,
"completions/max_terminated_length": 1893.0,
"completions/mean_length": 527.34375,
"completions/mean_terminated_length": 535.7142944335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.1504,
"grad_norm": 0.008453505113720894,
"learning_rate": 1.638888888888889e-06,
"loss": 0.0063,
"num_tokens": 32033237.0,
"reward": 1.6114122867584229,
"reward_std": 0.2033512145280838,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.783699631690979,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7947620749473572,
"step": 141
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.11328125,
"calib/ece": 0.35862745098039217,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.35862745098039217,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1542.0,
"completions/max_terminated_length": 1542.0,
"completions/mean_length": 562.73828125,
"completions/mean_terminated_length": 571.670654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.00851828046143055,
"learning_rate": 1.6111111111111113e-06,
"loss": -0.0085,
"num_tokens": 32282458.0,
"reward": 1.4801580905914307,
"reward_std": 0.24722754955291748,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/brier_reward_group": 0.7196464538574219,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6931735277175903,
"step": 142
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5028571428571429,
"calib/avg_num_step_conf": 2.265625,
"calib/ece": 0.3037647058823528,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.7142857142644665e-05,
"calib/mean_conf": 0.9900392156862744,
"calib/mu_c": 0.9900571428571426,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3037647058823528,
"calib/std_conf": 0.0006249951941376173,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1852.0,
"completions/max_terminated_length": 1852.0,
"completions/mean_length": 517.73046875,
"completions/mean_terminated_length": 525.9484252929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.008409008383750916,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0108,
"num_tokens": 32522333.0,
"reward": 1.5461618900299072,
"reward_std": 0.19538016617298126,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.7507796883583069,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.707305371761322,
"step": 143
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.3828125,
"calib/ece": 0.2243750000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2243750000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1712.0,
"completions/max_terminated_length": 1712.0,
"completions/mean_length": 517.0703125,
"completions/mean_terminated_length": 525.27783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.1536,
"grad_norm": 0.01105842087417841,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0083,
"num_tokens": 32758831.0,
"reward": 1.6755250692367554,
"reward_std": 0.2170555740594864,
"rewards/accuracy_reward_step": 0.765625,
"rewards/brier_reward_group": 0.8405249714851379,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7990751266479492,
"step": 144
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5026041666666666,
"calib/avg_num_step_conf": 2.5546875,
"calib/ece": 0.2311462450592887,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.2083333333285964e-05,
"calib/mean_conf": 0.9900395256916997,
"calib/mu_c": 0.9900520833333335,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2311462450592887,
"calib/std_conf": 0.0006274509038097848,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2577.0,
"completions/max_terminated_length": 2577.0,
"completions/mean_length": 524.8125,
"completions/mean_terminated_length": 533.1428833007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.014037779532372952,
"learning_rate": 1.527777777777778e-06,
"loss": -0.0081,
"num_tokens": 32995887.0,
"reward": 1.6281208992004395,
"reward_std": 0.23964989185333252,
"rewards/accuracy_reward_step": 0.75,
"rewards/brier_reward_group": 0.8128194212913513,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7309138774871826,
"step": 145
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.83203125,
"calib/ece": 0.4039453125000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00028301886792436726,
"calib/mean_conf": 0.9898828125000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9897169811320754,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4039453125000001,
"calib/std_conf": 0.002574275061030535,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1686.0,
"completions/max_terminated_length": 1686.0,
"completions/mean_length": 540.68359375,
"completions/mean_terminated_length": 549.2659301757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.012108377180993557,
"learning_rate": 1.5e-06,
"loss": 0.0099,
"num_tokens": 33241518.0,
"reward": 1.417668104171753,
"reward_std": 0.2301420271396637,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/brier_reward_group": 0.6665316224098206,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.660391092300415,
"step": 146
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.494949494949495,
"calib/avg_num_step_conf": 2.85546875,
"calib/ece": 0.37980314960629913,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00010101010101015717,
"calib/mean_conf": 0.9900393700787401,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99010101010101,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37980314960629913,
"calib/std_conf": 0.0006262194378183811,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2535.0,
"completions/max_terminated_length": 2535.0,
"completions/mean_length": 604.9609375,
"completions/mean_terminated_length": 609.7244262695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.1568,
"grad_norm": 0.01028156653046608,
"learning_rate": 1.4722222222222225e-06,
"loss": -0.0038,
"num_tokens": 33500068.0,
"reward": 1.4331023693084717,
"reward_std": 0.25268805027008057,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/brier_reward_group": 0.6855310201644897,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6406280994415283,
"step": 147
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.9609375,
"calib/ece": 0.20259842519685034,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.3306690738754696e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.20259842519685034,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1582.0,
"completions/max_terminated_length": 1582.0,
"completions/mean_length": 510.2734375,
"completions/mean_terminated_length": 518.373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.007744878530502319,
"learning_rate": 1.4444444444444445e-06,
"loss": -0.0203,
"num_tokens": 33735810.0,
"reward": 1.6874488592147827,
"reward_std": 0.16425636410713196,
"rewards/accuracy_reward_step": 0.78125,
"rewards/brier_reward_group": 0.8287118077278137,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8117086887359619,
"step": 148
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5058139534883721,
"calib/avg_num_step_conf": 3.01953125,
"calib/ece": 0.31023320158102774,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00011046511627899047,
"calib/mean_conf": 0.9900750988142293,
"calib/mu_c": 0.990110465116279,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.31023320158102774,
"calib/std_conf": 0.0008424815652963619,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2482.0,
"completions/max_terminated_length": 2482.0,
"completions/mean_length": 603.95703125,
"completions/mean_terminated_length": 613.543701171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.01476355828344822,
"learning_rate": 1.4166666666666667e-06,
"loss": -0.0105,
"num_tokens": 33994879.0,
"reward": 1.5313372611999512,
"reward_std": 0.222749263048172,
"rewards/accuracy_reward_step": 0.671875,
"rewards/brier_reward_group": 0.7424355149269104,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7266632318496704,
"step": 149
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5071428571428571,
"calib/avg_num_step_conf": 3.2578125,
"calib/ece": 0.26407114624505945,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9960474308300395,
"calib/gap": 0.009428571428571342,
"calib/mean_conf": 0.9873913043478261,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9805714285714284,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.26407114624505945,
"calib/std_conf": 0.041411759651445765,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2445.0,
"completions/max_terminated_length": 2445.0,
"completions/mean_length": 499.72265625,
"completions/mean_terminated_length": 507.65478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.16,
"grad_norm": 0.011728521436452866,
"learning_rate": 1.3888888888888892e-06,
"loss": -0.0128,
"num_tokens": 34227768.0,
"reward": 1.5932334661483765,
"reward_std": 0.1388700008392334,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.7603456974029541,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7844633460044861,
"step": 150
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5115467239527389,
"calib/avg_num_step_conf": 3.5546875,
"calib/ece": 0.38100000000000006,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.996,
"calib/gap": 0.0028866809881847866,
"calib/mean_conf": 0.989,
"calib/mu_c": 0.9901315789473685,
"calib/mu_w": 0.9872448979591837,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.38100000000000006,
"calib/std_conf": 0.015394804318340654,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2671.0,
"completions/max_terminated_length": 2671.0,
"completions/mean_length": 595.3828125,
"completions/mean_terminated_length": 607.2430419921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.009798677638173103,
"learning_rate": 1.3611111111111112e-06,
"loss": -0.0394,
"num_tokens": 34487210.0,
"reward": 1.403533935546875,
"reward_std": 0.20775704085826874,
"rewards/accuracy_reward_step": 0.59375,
"rewards/brier_reward_group": 0.6569827198982239,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.6446533203125,
"step": 151
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5027472527472527,
"calib/avg_num_step_conf": 3.76171875,
"calib/ece": 0.273503937007874,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.494505494485491e-05,
"calib/mean_conf": 0.9900393700787401,
"calib/mu_c": 0.9900549450549448,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.273503937007874,
"calib/std_conf": 0.0006262194378183811,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2473.0,
"completions/max_terminated_length": 2473.0,
"completions/mean_length": 586.57421875,
"completions/mean_terminated_length": 595.8849487304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 240.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.010811089538037777,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.028,
"num_tokens": 34742765.0,
"reward": 1.5861300230026245,
"reward_std": 0.19278180599212646,
"rewards/accuracy_reward_step": 0.7109375,
"rewards/brier_reward_group": 0.7697770595550537,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7544306516647339,
"step": 152
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.9609375,
"calib/ece": 0.30889763779527557,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30889763779527557,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2382.0,
"completions/max_terminated_length": 2382.0,
"completions/mean_length": 558.609375,
"completions/mean_terminated_length": 567.4761962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.1632,
"grad_norm": 0.014793137088418007,
"learning_rate": 1.3055555555555556e-06,
"loss": -0.0089,
"num_tokens": 34993089.0,
"reward": 1.54752779006958,
"reward_std": 0.23927220702171326,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/brier_reward_group": 0.7510262727737427,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7515851259231567,
"step": 153
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5150375939849624,
"calib/avg_num_step_conf": 4.65625,
"calib/ece": 0.46027888446215137,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0003007518796993569,
"calib/mean_conf": 0.9901593625498007,
"calib/mu_c": 0.9903007518796991,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.46027888446215137,
"calib/std_conf": 0.0012522895335061134,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2542.0,
"completions/max_terminated_length": 2542.0,
"completions/mean_length": 573.69140625,
"completions/mean_terminated_length": 587.4600219726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 244.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.010138453915715218,
"learning_rate": 1.2777777777777779e-06,
"loss": -0.0437,
"num_tokens": 35244394.0,
"reward": 1.3009169101715088,
"reward_std": 0.2400965690612793,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/brier_reward_group": 0.5758287906646729,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.5887768268585205,
"step": 154
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.50274467800241,
"calib/avg_num_step_conf": 4.91796875,
"calib/ece": 0.37669243027888444,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.393626991567757e-05,
"calib/mean_conf": 0.9902382470119522,
"calib/mu_c": 0.9902590909090909,
"calib/mu_w": 0.9902051546391752,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.37669243027888444,
"calib/std_conf": 0.0015224388442898556,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 1502.0,
"completions/max_terminated_length": 1502.0,
"completions/mean_length": 527.0703125,
"completions/mean_terminated_length": 539.7200317382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.010918740183115005,
"learning_rate": 1.25e-06,
"loss": -0.0585,
"num_tokens": 35486540.0,
"reward": 1.4296875,
"reward_std": 0.29709577560424805,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/brier_reward_group": 0.6891999244689941,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.662362277507782,
"step": 155
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5087719298245614,
"calib/avg_num_step_conf": 4.203125,
"calib/ece": 0.30337349397590374,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00017543859649149507,
"calib/mean_conf": 0.990120481927711,
"calib/mu_c": 0.9901754385964912,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.30337349397590374,
"calib/std_conf": 0.0010910102576069185,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2366.0,
"completions/max_terminated_length": 2366.0,
"completions/mean_length": 592.9140625,
"completions/mean_terminated_length": 609.5823364257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.1664,
"grad_norm": 0.010354182682931423,
"learning_rate": 1.2222222222222223e-06,
"loss": -0.0329,
"num_tokens": 35743086.0,
"reward": 1.5343012809753418,
"reward_std": 0.3117218017578125,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/brier_reward_group": 0.7521328926086426,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7678846120834351,
"step": 156
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4898785425101215,
"calib/avg_num_step_conf": 4.76171875,
"calib/ece": 0.24505882352941166,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00020242914979762272,
"calib/mean_conf": 0.990156862745098,
"calib/mu_c": 0.9901052631578947,
"calib/mu_w": 0.9903076923076923,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24505882352941166,
"calib/std_conf": 0.001242586628843519,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1477.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 565.06640625,
"completions/mean_terminated_length": 574.0357666015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 204.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.012160200625658035,
"learning_rate": 1.1944444444444446e-06,
"loss": -0.0206,
"num_tokens": 35991471.0,
"reward": 1.6435627937316895,
"reward_std": 0.2203359603881836,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/brier_reward_group": 0.8106563091278076,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8026572465896606,
"step": 157
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5133074162679426,
"calib/avg_num_step_conf": 4.54296875,
"calib/ece": 0.2919047619047621,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00026614832535920563,
"calib/mean_conf": 0.9903174603174605,
"calib/mu_c": 0.9903977272727272,
"calib/mu_w": 0.990131578947368,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2919047619047621,
"calib/std_conf": 0.0017532319074900432,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2490.0,
"completions/max_terminated_length": 2490.0,
"completions/mean_length": 572.046875,
"completions/mean_terminated_length": 583.4422607421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.014418968930840492,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0067,
"num_tokens": 36243155.0,
"reward": 1.5606688261032104,
"reward_std": 0.3102324903011322,
"rewards/accuracy_reward_step": 0.6875,
"rewards/brier_reward_group": 0.786474883556366,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.745262861251831,
"step": 158
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49085878397310173,
"calib/avg_num_step_conf": 4.14453125,
"calib/ece": 0.31560784313725465,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00018282432053784436,
"calib/mean_conf": 0.9901176470588233,
"calib/mu_c": 0.9900581395348836,
"calib/mu_w": 0.9902409638554215,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31560784313725465,
"calib/std_conf": 0.0010782531046954929,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1756.0,
"completions/max_terminated_length": 1756.0,
"completions/mean_length": 545.7421875,
"completions/mean_terminated_length": 554.40478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.0,
"epoch": 0.1696,
"grad_norm": 0.010768786072731018,
"learning_rate": 1.138888888888889e-06,
"loss": -0.0014,
"num_tokens": 36487649.0,
"reward": 1.557401418685913,
"reward_std": 0.24081464111804962,
"rewards/accuracy_reward_step": 0.671875,
"rewards/brier_reward_group": 0.7535665035247803,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8041641712188721,
"step": 159
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.03515625,
"calib/ece": 0.31539682539682545,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.31539682539682545,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2281.0,
"completions/max_terminated_length": 2281.0,
"completions/mean_length": 534.82421875,
"completions/mean_terminated_length": 545.4780883789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.008867080323398113,
"learning_rate": 1.111111111111111e-06,
"loss": -0.0248,
"num_tokens": 36729404.0,
"reward": 1.5163869857788086,
"reward_std": 0.2396489679813385,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/brier_reward_group": 0.727623701095581,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7129243612289429,
"step": 160
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4668008048289739,
"calib/avg_num_step_conf": 3.90625,
"calib/ece": 0.15486156862745082,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0006601945003353871,
"calib/mean_conf": 0.9901556862745097,
"calib/mu_c": 0.9900469483568075,
"calib/mu_w": 0.9907071428571429,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.15486156862745082,
"calib/std_conf": 0.0012332791533970614,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2860.0,
"completions/max_terminated_length": 2860.0,
"completions/mean_length": 511.58203125,
"completions/mean_terminated_length": 517.6482543945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 213.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.00983340572565794,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0084,
"num_tokens": 36964289.0,
"reward": 1.7533671855926514,
"reward_std": 0.14535510540008545,
"rewards/accuracy_reward_step": 0.83203125,
"rewards/brier_reward_group": 0.8738914728164673,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8270772695541382,
"step": 161
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.86328125,
"calib/ece": 0.182156862745098,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.182156862745098,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1309.0,
"completions/max_terminated_length": 1309.0,
"completions/mean_length": 478.39453125,
"completions/mean_terminated_length": 485.9881286621094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.1728,
"grad_norm": 0.009535176679491997,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0057,
"num_tokens": 37190902.0,
"reward": 1.7314547300338745,
"reward_std": 0.1599610149860382,
"rewards/accuracy_reward_step": 0.8046875,
"rewards/brier_reward_group": 0.858291506767273,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8565897941589355,
"step": 162
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5059171597633136,
"calib/avg_num_step_conf": 3.625,
"calib/ece": 0.32472440944881886,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00011834319526637938,
"calib/mean_conf": 0.9900787401574803,
"calib/mu_c": 0.9901183431952661,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.32472440944881886,
"calib/std_conf": 0.0008838560756158924,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2110.0,
"completions/max_terminated_length": 2110.0,
"completions/mean_length": 556.859375,
"completions/mean_terminated_length": 565.6984252929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.023701557889580727,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0024,
"num_tokens": 37438290.0,
"reward": 1.5212602615356445,
"reward_std": 0.2810484766960144,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.7524918913841248,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7075492143630981,
"step": 163
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.6484375,
"calib/ece": 0.3220312500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3220312500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1736.0,
"completions/max_terminated_length": 1736.0,
"completions/mean_length": 589.0078125,
"completions/mean_terminated_length": 598.357177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 257.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.01062087807804346,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0031,
"num_tokens": 37695212.0,
"reward": 1.532461404800415,
"reward_std": 0.198165625333786,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/brier_reward_group": 0.7308089733123779,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7427867650985718,
"step": 164
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.50920245398773,
"calib/avg_num_step_conf": 3.6796875,
"calib/ece": 0.3509019607843137,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00018404907975455576,
"calib/mean_conf": 0.9901176470588234,
"calib/mu_c": 0.9901840490797544,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3509019607843137,
"calib/std_conf": 0.0010782531046954927,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2239.0,
"completions/max_terminated_length": 2239.0,
"completions/mean_length": 581.88671875,
"completions/mean_terminated_length": 591.123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 233.0,
"epoch": 0.176,
"grad_norm": 0.011879962868988514,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0201,
"num_tokens": 37949751.0,
"reward": 1.4884181022644043,
"reward_std": 0.24818657338619232,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/brier_reward_group": 0.7207316160202026,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.693878173828125,
"step": 165
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.76171875,
"calib/ece": 0.21322709163346612,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9960159362549801,
"calib/gap": 0.017017543859649154,
"calib/mean_conf": 0.9861354581673307,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9729824561403508,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.21322709163346612,
"calib/std_conf": 0.061739440745079505,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3036.0,
"completions/max_terminated_length": 3036.0,
"completions/mean_length": 568.79296875,
"completions/mean_terminated_length": 575.53759765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.009490316733717918,
"learning_rate": 9.444444444444445e-07,
"loss": -0.0207,
"num_tokens": 38201546.0,
"reward": 1.6525752544403076,
"reward_std": 0.2127598077058792,
"rewards/accuracy_reward_step": 0.7578125,
"rewards/brier_reward_group": 0.8025659322738647,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8155475854873657,
"step": 166
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5051282051282051,
"calib/avg_num_step_conf": 3.8671875,
"calib/ece": 0.21932806324110676,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.000102564102563929,
"calib/mean_conf": 0.9900790513833992,
"calib/mu_c": 0.990102564102564,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.21932806324110676,
"calib/std_conf": 0.0008855872135339169,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2242.0,
"completions/max_terminated_length": 2242.0,
"completions/mean_length": 540.40234375,
"completions/mean_terminated_length": 548.9801635742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 235.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.007833111099898815,
"learning_rate": 9.166666666666666e-07,
"loss": -0.0158,
"num_tokens": 38445497.0,
"reward": 1.6641058921813965,
"reward_std": 0.24750857055187225,
"rewards/accuracy_reward_step": 0.76171875,
"rewards/brier_reward_group": 0.8293477296829224,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8036386966705322,
"step": 167
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.75,
"calib/ece": 0.2605882352941177,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2605882352941177,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2620.0,
"completions/max_terminated_length": 2620.0,
"completions/mean_length": 604.66015625,
"completions/mean_terminated_length": 611.830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 266.0,
"epoch": 0.1792,
"grad_norm": 0.012309842742979527,
"learning_rate": 8.88888888888889e-07,
"loss": -0.0032,
"num_tokens": 38704962.0,
"reward": 1.6134165525436401,
"reward_std": 0.245762437582016,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/brier_reward_group": 0.808096170425415,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7471325397491455,
"step": 168
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49726402188782487,
"calib/avg_num_step_conf": 3.90625,
"calib/ece": 0.32625000000000015,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -5.471956224323282e-05,
"calib/mean_conf": 0.9903125000000002,
"calib/mu_c": 0.9902941176470588,
"calib/mu_w": 0.990348837209302,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32625000000000015,
"calib/std_conf": 0.0017399263633843835,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1619.0,
"completions/max_terminated_length": 1619.0,
"completions/mean_length": 545.78515625,
"completions/mean_terminated_length": 554.4484252929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.010852995328605175,
"learning_rate": 8.611111111111112e-07,
"loss": -0.0029,
"num_tokens": 38948867.0,
"reward": 1.5293468236923218,
"reward_std": 0.18101638555526733,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/brier_reward_group": 0.7271101474761963,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7340270280838013,
"step": 169
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5129554655870445,
"calib/avg_num_step_conf": 4.27734375,
"calib/ece": 0.24125490196078422,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.01534008097165962,
"calib/mean_conf": 0.9863529411764705,
"calib/mu_c": 0.9902631578947367,
"calib/mu_w": 0.9749230769230771,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24125490196078422,
"calib/std_conf": 0.06190784534191844,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1213.0,
"completions/max_terminated_length": 1213.0,
"completions/mean_length": 551.42578125,
"completions/mean_terminated_length": 560.1785888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.007006386760622263,
"learning_rate": 8.333333333333333e-07,
"loss": -0.0061,
"num_tokens": 39194184.0,
"reward": 1.636326551437378,
"reward_std": 0.1704462468624115,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/brier_reward_group": 0.7999738454818726,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7843947410583496,
"step": 170
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5324571883711668,
"calib/avg_num_step_conf": 4.20703125,
"calib/ece": 0.35533333333333317,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006491437674235323,
"calib/mean_conf": 0.990627450980392,
"calib/mu_c": 0.9908641975308643,
"calib/mu_w": 0.9902150537634408,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35533333333333317,
"calib/std_conf": 0.0024250391896063245,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1318.0,
"completions/max_terminated_length": 1318.0,
"completions/mean_length": 539.25,
"completions/mean_terminated_length": 547.8095703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.1824,
"grad_norm": 0.009774522855877876,
"learning_rate": 8.055555555555557e-07,
"loss": -0.0079,
"num_tokens": 39439128.0,
"reward": 1.473537564277649,
"reward_std": 0.22798626124858856,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/brier_reward_group": 0.7120257616043091,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6586868762969971,
"step": 171
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5456695240910434,
"calib/avg_num_step_conf": 3.96484375,
"calib/ece": 0.19456000000000004,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.988,
"calib/gap": 0.017995861661247226,
"calib/mean_conf": 0.9825600000000001,
"calib/mu_c": 0.9862311557788944,
"calib/mu_w": 0.9682352941176472,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.19056000000000003,
"calib/std_conf": 0.08078518676094028,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2261.0,
"completions/max_terminated_length": 2261.0,
"completions/mean_length": 526.01953125,
"completions/mean_terminated_length": 536.498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 220.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.013370024040341377,
"learning_rate": 7.777777777777779e-07,
"loss": -0.0149,
"num_tokens": 39677141.0,
"reward": 1.6798417568206787,
"reward_std": 0.22582019865512848,
"rewards/accuracy_reward_step": 0.77734375,
"rewards/brier_reward_group": 0.845207154750824,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8116599321365356,
"step": 172
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.3976315996537886,
"calib/avg_num_step_conf": 4.04296875,
"calib/ece": 0.2795600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.996,
"calib/gap": -0.00753875206546506,
"calib/mean_conf": 0.9875600000000001,
"calib/mu_c": 0.9854189944134077,
"calib/mu_w": 0.9929577464788728,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2755600000000001,
"calib/std_conf": 0.06268689177172529,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2675.0,
"completions/max_terminated_length": 2675.0,
"completions/mean_length": 570.3125,
"completions/mean_terminated_length": 588.7096557617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.008059374988079071,
"learning_rate": 7.5e-07,
"loss": -0.036,
"num_tokens": 39926301.0,
"reward": 1.5564550161361694,
"reward_std": 0.21988022327423096,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/brier_reward_group": 0.7573325037956238,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7262998819351196,
"step": 173
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4575250836120401,
"calib/avg_num_step_conf": 4.109375,
"calib/ece": 0.2517670682730925,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0006872909698998342,
"calib/mean_conf": 0.9907228915662651,
"calib/mu_c": 0.9905434782608695,
"calib/mu_w": 0.9912307692307694,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.2517670682730925,
"calib/std_conf": 0.003019268454491971,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2415.0,
"completions/max_terminated_length": 2415.0,
"completions/mean_length": 587.125,
"completions/mean_terminated_length": 603.6304931640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.0,
"epoch": 0.1856,
"grad_norm": 0.027610663324594498,
"learning_rate": 7.222222222222222e-07,
"loss": 0.012,
"num_tokens": 40180837.0,
"reward": 1.593778133392334,
"reward_std": 0.3387395739555359,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.810280978679657,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7445189952850342,
"step": 174
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5908522087348267,
"calib/avg_num_step_conf": 4.2421875,
"calib/ece": 0.5054150197628459,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9881422924901185,
"calib/gap": 0.009105243398823792,
"calib/mean_conf": 0.9800395256916996,
"calib/mu_c": 0.9847540983606559,
"calib/mu_w": 0.9756488549618321,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.5016205533596838,
"calib/std_conf": 0.10598119761540901,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2777.0,
"completions/max_terminated_length": 2777.0,
"completions/mean_length": 652.21484375,
"completions/mean_terminated_length": 657.3504028320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.009824872948229313,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0016,
"num_tokens": 40453628.0,
"reward": 1.263087272644043,
"reward_std": 0.2979387640953064,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/brier_reward_group": 0.578266978263855,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.5990820527076721,
"step": 175
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5172768553994607,
"calib/avg_num_step_conf": 5.375,
"calib/ece": 0.3167843137254903,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9921568627450981,
"calib/gap": -0.010815240527884407,
"calib/mean_conf": 0.9836078431372549,
"calib/mu_c": 0.9801724137931032,
"calib/mu_w": 0.9909876543209876,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30901960784313737,
"calib/std_conf": 0.0866337973336387,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1559.0,
"completions/max_terminated_length": 1559.0,
"completions/mean_length": 554.05859375,
"completions/mean_terminated_length": 562.8532104492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.01098695769906044,
"learning_rate": 6.666666666666667e-07,
"loss": -0.0266,
"num_tokens": 40699531.0,
"reward": 1.558232307434082,
"reward_std": 0.19351378083229065,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/brier_reward_group": 0.7470316290855408,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7749598026275635,
"step": 176
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5670391061452514,
"calib/avg_num_step_conf": 5.7734375,
"calib/ece": 0.28622047244094484,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0013407821229052264,
"calib/mean_conf": 0.9909448818897637,
"calib/mu_c": 0.991340782122905,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.28622047244094484,
"calib/std_conf": 0.0029250670269301933,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1611.0,
"completions/max_terminated_length": 1611.0,
"completions/mean_length": 586.91796875,
"completions/mean_terminated_length": 596.2341918945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.0,
"epoch": 0.1888,
"grad_norm": 0.009471280500292778,
"learning_rate": 6.388888888888889e-07,
"loss": -0.0103,
"num_tokens": 40953614.0,
"reward": 1.5689765214920044,
"reward_std": 0.17845270037651062,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/brier_reward_group": 0.7458236217498779,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7566449046134949,
"step": 177
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5126967626967627,
"calib/avg_num_step_conf": 6.26953125,
"calib/ece": 0.2796484375000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00025393525393524463,
"calib/mean_conf": 0.9905859375000001,
"calib/mu_c": 0.9906593406593406,
"calib/mu_w": 0.9904054054054053,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2796484375000001,
"calib/std_conf": 0.0023486277368058484,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1399.0,
"completions/max_terminated_length": 1399.0,
"completions/mean_length": 608.63671875,
"completions/mean_terminated_length": 618.2976684570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 241.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.00890075508505106,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0136,
"num_tokens": 41215497.0,
"reward": 1.6032111644744873,
"reward_std": 0.23842912912368774,
"rewards/accuracy_reward_step": 0.7109375,
"rewards/brier_reward_group": 0.7975425720214844,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7715519666671753,
"step": 178
},
{
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.48202378806992247,
"calib/avg_num_step_conf": 11.4453125,
"calib/ece": 0.24780082987551877,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00035952423860197147,
"calib/mean_conf": 0.990539419087137,
"calib/mu_c": 0.9904469273743015,
"calib/mu_w": 0.9908064516129035,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.24780082987551877,
"calib/std_conf": 0.0022590303051977117,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 624.6953125,
"completions/mean_terminated_length": 655.41796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 281.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.01979055441915989,
"learning_rate": 5.833333333333334e-07,
"loss": -0.047,
"num_tokens": 41481683.0,
"reward": 1.5406606197357178,
"reward_std": 0.3186691403388977,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/brier_reward_group": 0.7571793794631958,
"rewards/format_reward_step": 0.9375,
"rewards/stepwise_brier_reward": 0.7335880994796753,
"step": 179
},
{
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.49709081710093606,
"calib/avg_num_step_conf": 14.3984375,
"calib/ece": 0.2649180327868852,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0002462264946456738,
"calib/mean_conf": 0.9903278688524589,
"calib/mu_c": 0.9903954802259888,
"calib/mu_w": 0.9901492537313431,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2649180327868852,
"calib/std_conf": 0.0028441559955569655,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 2981.0,
"completions/max_terminated_length": 2981.0,
"completions/mean_length": 738.76171875,
"completions/mean_terminated_length": 775.09423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 307.0,
"epoch": 0.192,
"grad_norm": 0.008890091441571712,
"learning_rate": 5.555555555555555e-07,
"loss": -0.099,
"num_tokens": 41774662.0,
"reward": 1.5315313339233398,
"reward_std": 0.30622541904449463,
"rewards/accuracy_reward_step": 0.69140625,
"rewards/brier_reward_group": 0.7310481667518616,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.7232024073600769,
"step": 180
},
{
"calib/answer_extract_rate": 0.86328125,
"calib/auroc": 0.5106624319419237,
"calib/avg_num_step_conf": 24.91015625,
"calib/ece": 0.3341628959276016,
"calib/final_conf_rate": 0.86328125,
"calib/format_rate": 0.86328125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00021324863883842582,
"calib/mean_conf": 0.9902714932126695,
"calib/mu_c": 0.9903448275862067,
"calib/mu_w": 0.9901315789473683,
"calib/nonempty_final_conf_rate": 0.86328125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3341628959276016,
"calib/std_conf": 0.0016251841625400892,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 3011.0,
"completions/max_terminated_length": 3011.0,
"completions/mean_length": 638.71875,
"completions/mean_terminated_length": 729.9642944335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 266.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.01944536156952381,
"learning_rate": 5.277777777777779e-07,
"loss": -0.1366,
"num_tokens": 42044438.0,
"reward": 1.3028833866119385,
"reward_std": 0.4944838881492615,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/brier_reward_group": 0.6211562156677246,
"rewards/format_reward_step": 0.86328125,
"rewards/stepwise_brier_reward": 0.5981900095939636,
"step": 181
},
{
"calib/answer_extract_rate": 0.93359375,
"calib/auroc": 0.5105828720286552,
"calib/avg_num_step_conf": 16.91796875,
"calib/ece": 0.29875000000000007,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0002116574405728322,
"calib/mean_conf": 0.9904166666666667,
"calib/mu_c": 0.9904819277108431,
"calib/mu_w": 0.9902702702702703,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29875000000000007,
"calib/std_conf": 0.001998263134713635,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2741.0,
"completions/max_terminated_length": 2741.0,
"completions/mean_length": 684.17578125,
"completions/mean_terminated_length": 726.7593994140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 322.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.009764508344233036,
"learning_rate": 5.000000000000001e-07,
"loss": -0.0896,
"num_tokens": 42325747.0,
"reward": 1.45506751537323,
"reward_std": 0.37210720777511597,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/brier_reward_group": 0.6974618434906006,
"rewards/format_reward_step": 0.93359375,
"rewards/stepwise_brier_reward": 0.6618707180023193,
"step": 182
},
{
"calib/answer_extract_rate": 0.90234375,
"calib/auroc": 0.50920245398773,
"calib/avg_num_step_conf": 19.65625,
"calib/ece": 0.28450216450216437,
"calib/final_conf_rate": 0.90234375,
"calib/format_rate": 0.90234375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00018404907975455576,
"calib/mean_conf": 0.99012987012987,
"calib/mu_c": 0.9901840490797544,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.90234375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.28450216450216437,
"calib/std_conf": 0.0011321815437767995,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3026.0,
"completions/max_terminated_length": 3026.0,
"completions/mean_length": 748.1171875,
"completions/mean_terminated_length": 814.97021484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 295.0,
"epoch": 0.1952,
"grad_norm": 0.012072579003870487,
"learning_rate": 4.7222222222222226e-07,
"loss": -0.0901,
"num_tokens": 42623945.0,
"reward": 1.4325345754623413,
"reward_std": 0.40637436509132385,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/brier_reward_group": 0.7000696063041687,
"rewards/format_reward_step": 0.90234375,
"rewards/stepwise_brier_reward": 0.678506076335907,
"step": 183
},
{
"calib/answer_extract_rate": 0.90234375,
"calib/auroc": 0.5168248490077654,
"calib/avg_num_step_conf": 17.6015625,
"calib/ece": 0.2529741379310346,
"calib/final_conf_rate": 0.90625,
"calib/format_rate": 0.90234375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005033074489499967,
"calib/mean_conf": 0.990043103448276,
"calib/mu_c": 0.9901754385964912,
"calib/mu_w": 0.9896721311475412,
"calib/nonempty_final_conf_rate": 0.90625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2529741379310346,
"calib/std_conf": 0.0017364859550416988,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2869.0,
"completions/max_terminated_length": 2869.0,
"completions/mean_length": 645.7421875,
"completions/mean_terminated_length": 703.44677734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 274.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.008652531541883945,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0473,
"num_tokens": 42894535.0,
"reward": 1.4711438417434692,
"reward_std": 0.3274310827255249,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/brier_reward_group": 0.7086145281791687,
"rewards/format_reward_step": 0.90234375,
"rewards/stepwise_brier_reward": 0.6993983387947083,
"step": 184
},
{
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5026881720430108,
"calib/avg_num_step_conf": 9.4609375,
"calib/ece": 0.23394308943089426,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.3763440860055134e-05,
"calib/mean_conf": 0.990040650406504,
"calib/mu_c": 0.9900537634408603,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.23394308943089426,
"calib/std_conf": 0.0006362795057926236,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2519.0,
"completions/max_terminated_length": 2519.0,
"completions/mean_length": 707.58984375,
"completions/mean_terminated_length": 721.685302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 357.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.01276659406721592,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0043,
"num_tokens": 43182598.0,
"reward": 1.5871763229370117,
"reward_std": 0.23624706268310547,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/brier_reward_group": 0.770573616027832,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.7578195333480835,
"step": 185
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5110367892976588,
"calib/avg_num_step_conf": 8.66796875,
"calib/ece": 0.24742971887550208,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.9959839357429718,
"calib/gap": 0.015303511705685668,
"calib/mean_conf": 0.9863855421686748,
"calib/mu_c": 0.9903804347826088,
"calib/mu_w": 0.9750769230769232,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.24742971887550208,
"calib/std_conf": 0.06266334439471093,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2887.0,
"completions/max_terminated_length": 2887.0,
"completions/mean_length": 668.296875,
"completions/mean_terminated_length": 684.3360595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 263.0,
"epoch": 0.1984,
"grad_norm": 0.00998939573764801,
"learning_rate": 3.8888888888888895e-07,
"loss": -0.034,
"num_tokens": 43458722.0,
"reward": 1.5860662460327148,
"reward_std": 0.20728999376296997,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.7621311545372009,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.7696335315704346,
"step": 186
},
{
"calib/answer_extract_rate": 0.94921875,
"calib/auroc": 0.4972781065088757,
"calib/avg_num_step_conf": 7.81640625,
"calib/ece": 0.2976639344262294,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00022169625246559743,
"calib/mean_conf": 0.9902868852459016,
"calib/mu_c": 0.9903550295857988,
"calib/mu_w": 0.9901333333333332,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.2976639344262294,
"calib/std_conf": 0.002775712899704437,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2844.0,
"completions/max_terminated_length": 2844.0,
"completions/mean_length": 687.0625,
"completions/mean_terminated_length": 714.9918212890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.017793305218219757,
"learning_rate": 3.611111111111111e-07,
"loss": -0.0418,
"num_tokens": 43736154.0,
"reward": 1.5049433708190918,
"reward_std": 0.37476426362991333,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.7644026279449463,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.71630859375,
"step": 187
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5325,
"calib/avg_num_step_conf": 6.7265625,
"calib/ece": 0.20619607843137255,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006499999999997064,
"calib/mean_conf": 0.9905098039215686,
"calib/mu_c": 0.99065,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.20619607843137255,
"calib/std_conf": 0.0021995770450792423,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1511.0,
"completions/max_terminated_length": 1511.0,
"completions/mean_length": 664.03515625,
"completions/mean_terminated_length": 674.575439453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 245.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.008216816000640392,
"learning_rate": 3.3333333333333335e-07,
"loss": -0.0195,
"num_tokens": 44010219.0,
"reward": 1.695479393005371,
"reward_std": 0.163077250123024,
"rewards/accuracy_reward_step": 0.78125,
"rewards/brier_reward_group": 0.8345831632614136,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8301471471786499,
"step": 188
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5093599033816425,
"calib/avg_num_step_conf": 6.8515625,
"calib/ece": 0.2715234375000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0001871980676328988,
"calib/mean_conf": 0.9902734375000001,
"calib/mu_c": 0.9903260869565217,
"calib/mu_w": 0.9901388888888888,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2715234375000001,
"calib/std_conf": 0.001630830136339697,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1543.0,
"completions/max_terminated_length": 1543.0,
"completions/mean_length": 620.70703125,
"completions/mean_terminated_length": 630.5595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 250.0,
"epoch": 0.2016,
"grad_norm": 0.009894832968711853,
"learning_rate": 3.055555555555556e-07,
"loss": -0.0149,
"num_tokens": 44276888.0,
"reward": 1.6106594800949097,
"reward_std": 0.2062380313873291,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.7946687340736389,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7807815670967102,
"step": 189
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5088156230234029,
"calib/avg_num_step_conf": 6.4296875,
"calib/ece": 0.26196850393700793,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": -0.005039531941808617,
"calib/mean_conf": 0.9864566929133859,
"calib/mu_c": 0.9851075268817205,
"calib/mu_w": 0.9901470588235292,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.25807086614173236,
"calib/std_conf": 0.061414142861346575,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2453.0,
"completions/max_terminated_length": 2453.0,
"completions/mean_length": 666.02734375,
"completions/mean_terminated_length": 676.5992431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 260.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.006808450445532799,
"learning_rate": 2.7777777777777776e-07,
"loss": -0.0015,
"num_tokens": 44552999.0,
"reward": 1.604135274887085,
"reward_std": 0.16429871320724487,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/brier_reward_group": 0.7767007350921631,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7492154836654663,
"step": 190
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.49062078272604587,
"calib/avg_num_step_conf": 7.20703125,
"calib/ece": 0.3686852589641435,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00018758434547894165,
"calib/mean_conf": 0.9901992031872511,
"calib/mu_c": 0.990128205128205,
"calib/mu_w": 0.9903157894736839,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3686852589641435,
"calib/std_conf": 0.001397265172649418,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2606.0,
"completions/max_terminated_length": 2606.0,
"completions/mean_length": 602.3828125,
"completions/mean_terminated_length": 621.8145141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.008911381475627422,
"learning_rate": 2.5000000000000004e-07,
"loss": -0.0611,
"num_tokens": 44811377.0,
"reward": 1.429365634918213,
"reward_std": 0.17169693112373352,
"rewards/accuracy_reward_step": 0.609375,
"rewards/brier_reward_group": 0.6546695232391357,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6643551588058472,
"step": 191
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4935262148337596,
"calib/avg_num_step_conf": 6.38671875,
"calib/ece": 0.26404761904761914,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.996031746031746,
"calib/gap": -0.005402813299232423,
"calib/mean_conf": 0.9863492063492063,
"calib/mu_c": 0.9848913043478262,
"calib/mu_w": 0.9902941176470587,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2601190476190477,
"calib/std_conf": 0.06164546835908856,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2522.0,
"completions/max_terminated_length": 2522.0,
"completions/mean_length": 610.15625,
"completions/mean_terminated_length": 622.310791015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.0,
"epoch": 0.2048,
"grad_norm": 0.010867852717638016,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0018,
"num_tokens": 45072553.0,
"reward": 1.5917949676513672,
"reward_std": 0.21750152111053467,
"rewards/accuracy_reward_step": 0.71875,
"rewards/brier_reward_group": 0.7597798109054565,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7714629173278809,
"step": 192
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5056060606060606,
"calib/avg_num_step_conf": 6.703125,
"calib/ece": 0.28501992031872525,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9960159362549801,
"calib/gap": 0.013046969696969835,
"calib/mean_conf": 0.9862151394422312,
"calib/mu_c": 0.9901136363636364,
"calib/mu_w": 0.9770666666666665,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.28501992031872525,
"calib/std_conf": 0.06175082890454899,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2279.0,
"completions/max_terminated_length": 2279.0,
"completions/mean_length": 577.17578125,
"completions/mean_terminated_length": 591.0280151367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 240.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.010334227234125137,
"learning_rate": 1.9444444444444447e-07,
"loss": -0.0436,
"num_tokens": 45326022.0,
"reward": 1.5489764213562012,
"reward_std": 0.2321387529373169,
"rewards/accuracy_reward_step": 0.6875,
"rewards/brier_reward_group": 0.7586432695388794,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7263247966766357,
"step": 193
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5377777777777778,
"calib/avg_num_step_conf": 6.609375,
"calib/ece": 0.29894531249999995,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.9921875,
"calib/gap": 0.024844444444444447,
"calib/mean_conf": 0.9825390625000001,
"calib/mu_c": 0.9904,
"calib/mu_w": 0.9655555555555555,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29894531249999995,
"calib/std_conf": 0.08632355580385398,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1368.0,
"completions/max_terminated_length": 1368.0,
"completions/mean_length": 586.3515625,
"completions/mean_terminated_length": 595.6587524414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.031296394765377045,
"learning_rate": 1.6666666666666668e-07,
"loss": -0.0056,
"num_tokens": 45582072.0,
"reward": 1.5580151081085205,
"reward_std": 0.22170788049697876,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/brier_reward_group": 0.7631152868270874,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7423826456069946,
"step": 194
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5213414634146342,
"calib/avg_num_step_conf": 6.30078125,
"calib/ece": 0.339484126984127,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00042682926829240575,
"calib/mean_conf": 0.9902777777777778,
"calib/mu_c": 0.9904268292682925,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.339484126984127,
"calib/std_conf": 0.0016433554953054499,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 1475.0,
"completions/max_terminated_length": 1475.0,
"completions/mean_length": 600.27734375,
"completions/mean_terminated_length": 617.152587890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.0,
"epoch": 0.208,
"grad_norm": 0.010016894899308681,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.0405,
"num_tokens": 45841727.0,
"reward": 1.479560136795044,
"reward_std": 0.24249720573425293,
"rewards/accuracy_reward_step": 0.640625,
"rewards/brier_reward_group": 0.7105613946914673,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6842419505119324,
"step": 195
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5053896249719291,
"calib/avg_num_step_conf": 6.16015625,
"calib/ece": 0.2715624999999998,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.99609375,
"calib/gap": 0.012986001946253545,
"calib/mean_conf": 0.9864062500000002,
"calib/mu_c": 0.9901092896174863,
"calib/mu_w": 0.9771232876712328,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2715624999999998,
"calib/std_conf": 0.059276027708825944,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1276.0,
"completions/max_terminated_length": 1276.0,
"completions/mean_length": 522.046875,
"completions/mean_terminated_length": 530.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 259.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.014148207381367683,
"learning_rate": 1.1111111111111112e-07,
"loss": -0.0091,
"num_tokens": 46077915.0,
"reward": 1.6046156883239746,
"reward_std": 0.1983228623867035,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/brier_reward_group": 0.7881472706794739,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.770940899848938,
"step": 196
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4980625161456988,
"calib/avg_num_step_conf": 6.1171875,
"calib/ece": 0.37289062500000014,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.87496770861695e-05,
"calib/mean_conf": 0.9900781250000001,
"calib/mu_c": 0.9900632911392404,
"calib/mu_w": 0.9901020408163266,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.37289062500000014,
"calib/std_conf": 0.0008804240366863013,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1183.0,
"completions/max_terminated_length": 1183.0,
"completions/mean_length": 580.67578125,
"completions/mean_terminated_length": 589.8928833007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 251.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.01780375838279724,
"learning_rate": 8.333333333333334e-08,
"loss": 0.0089,
"num_tokens": 46331624.0,
"reward": 1.4571163654327393,
"reward_std": 0.21886040270328522,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/brier_reward_group": 0.6930257678031921,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6666896343231201,
"step": 197
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49707622694048037,
"calib/avg_num_step_conf": 6.0625,
"calib/ece": 0.324724409448819,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -5.847546119019942e-05,
"calib/mean_conf": 0.9900787401574804,
"calib/mu_c": 0.9900591715976331,
"calib/mu_w": 0.9901176470588233,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.324724409448819,
"calib/std_conf": 0.0008838560756158927,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1807.0,
"completions/max_terminated_length": 1807.0,
"completions/mean_length": 554.56640625,
"completions/mean_terminated_length": 563.3690795898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 234.0,
"epoch": 0.2112,
"grad_norm": 0.008209271356463432,
"learning_rate": 5.555555555555556e-08,
"loss": -0.0367,
"num_tokens": 46578977.0,
"reward": 1.529144287109375,
"reward_std": 0.24424126744270325,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/brier_reward_group": 0.739728569984436,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7596610188484192,
"step": 198
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49287217757367013,
"calib/avg_num_step_conf": 5.9375,
"calib/ece": 0.19561264822134394,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00014255644852667615,
"calib/mean_conf": 0.9900790513833992,
"calib/mu_c": 0.9900497512437811,
"calib/mu_w": 0.9901923076923078,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.19561264822134394,
"calib/std_conf": 0.0008855872135339169,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2442.0,
"completions/max_terminated_length": 2442.0,
"completions/mean_length": 619.68359375,
"completions/mean_terminated_length": 632.0278930664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 259.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.02195962518453598,
"learning_rate": 2.777777777777778e-08,
"loss": 0.0067,
"num_tokens": 46841816.0,
"reward": 1.6957043409347534,
"reward_std": 0.17761465907096863,
"rewards/accuracy_reward_step": 0.78515625,
"rewards/brier_reward_group": 0.8301656246185303,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8354641795158386,
"step": 199
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5145348837209303,
"calib/avg_num_step_conf": 6.203125,
"calib/ece": 0.3156862745098039,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00029069767441858296,
"calib/mean_conf": 0.9901960784313726,
"calib/mu_c": 0.9902906976744185,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3156862745098039,
"calib/std_conf": 0.001386483884679506,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1513.0,
"completions/max_terminated_length": 1513.0,
"completions/mean_length": 619.43359375,
"completions/mean_terminated_length": 629.2659301757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.008899247273802757,
"learning_rate": 0.0,
"loss": -0.005,
"num_tokens": 47108439.0,
"reward": 1.5375908613204956,
"reward_std": 0.1821175068616867,
"rewards/accuracy_reward_step": 0.671875,
"rewards/brier_reward_group": 0.7398461699485779,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7308297157287598,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": -0.005905325598432682,
"train_runtime": 13072.8321,
"train_samples_per_second": 3.917,
"train_steps_per_second": 0.015
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 47108439,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}