Files
PureRL-1.5B-v7-stage1-B-ana…/trainer_state.json
ModelHub XC 1212d7a5d3 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v7-stage1-B-analysis
Source: Original Platform
2026-06-06 21:21:23 +08:00

9039 lines
344 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.11328125,
"calib/avg_num_step_conf": 0.33984375,
"calib/ece": 0.4849999999999999,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.7222222222222222,
"calib/gap": -0.04555555555555568,
"calib/mean_conf": 0.9349999999999999,
"calib/mu_c": 0.9122222222222222,
"calib/mu_w": 0.9577777777777778,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.12890625,
"calib/nonempty_step_conf_rate": 0.078125,
"calib/pce": 0.45999999999999985,
"calib/std_conf": 0.046338129248192805,
"calib/step_conf_rate": 0.078125,
"calib/step_q_c": 0.8305405405405405,
"calib/step_q_c_n": 37.0,
"calib/step_q_gap": 0.11922054054054032,
"calib/step_q_w": 0.7113200000000002,
"calib/step_q_w_n": 50.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 705.6796875,
"completions/mean_terminated_length": 759.0504760742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.10124761611223221,
"learning_rate": 0.0,
"loss": 0.006,
"num_tokens": 312046.0,
"reward": 0.068359375,
"reward_std": 0.08782906830310822,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/format_reward_step": 0.05859375,
"step": 1
},
{
"calib/answer_extract_rate": 0.1171875,
"calib/avg_num_step_conf": 0.296875,
"calib/ece": 0.8683333333333332,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": -0.20176470588235307,
"calib/mean_conf": 0.8905555555555554,
"calib/mu_c": 0.7,
"calib/mu_w": 0.901764705882353,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.07421875,
"calib/pce": 0.8516666666666666,
"calib/std_conf": 0.1136501562897797,
"calib/step_conf_rate": 0.07421875,
"calib/step_q_c": 0.2,
"calib/step_q_c_n": 2.0,
"calib/step_q_gap": -0.5893243243243242,
"calib/step_q_w": 0.7893243243243243,
"calib/step_q_w_n": 74.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06640625,
"completions/max_length": 3007.0,
"completions/max_terminated_length": 3007.0,
"completions/mean_length": 739.44921875,
"completions/mean_terminated_length": 792.0460205078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 9.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.1276310682296753,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0175,
"num_tokens": 628441.0,
"reward": 0.033203125,
"reward_std": 0.056683022528886795,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/format_reward_step": 0.05859375,
"step": 2
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/avg_num_step_conf": 0.1015625,
"calib/ece": 0.6842857142857142,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.01171875,
"calib/frac_conf_gt_0.9": 0.8571428571428571,
"calib/gap": 0.020999999999999908,
"calib/mean_conf": 0.9699999999999999,
"calib/mu_c": 0.985,
"calib/mu_w": 0.9640000000000001,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.0546875,
"calib/nonempty_step_conf_rate": 0.01953125,
"calib/pce": 0.6842857142857142,
"calib/std_conf": 0.03464101615137754,
"calib/step_conf_rate": 0.01953125,
"calib/step_q_c": 0.8636363636363636,
"calib/step_q_c_n": 11.0,
"calib/step_q_gap": -0.01703030303030295,
"calib/step_q_w": 0.8806666666666666,
"calib/step_q_w_n": 15.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.109375,
"completions/max_length": 2912.0,
"completions/max_terminated_length": 2912.0,
"completions/mean_length": 658.28515625,
"completions/mean_terminated_length": 739.127197265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0032,
"grad_norm": 0.06745757162570953,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0039,
"num_tokens": 926026.0,
"reward": 0.017578125,
"reward_std": 0.03477538749575615,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/format_reward_step": 0.01171875,
"step": 3
},
{
"calib/answer_extract_rate": 0.078125,
"calib/avg_num_step_conf": 0.16015625,
"calib/ece": 0.6170000000000001,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.7,
"calib/gap": 0.08047619047619026,
"calib/mean_conf": 0.9169999999999998,
"calib/mu_c": 0.9733333333333333,
"calib/mu_w": 0.892857142857143,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.0390625,
"calib/pce": 0.6170000000000001,
"calib/std_conf": 0.07975587752636164,
"calib/step_conf_rate": 0.0390625,
"calib/step_q_c": 0.8742857142857143,
"calib/step_q_c_n": 7.0,
"calib/step_q_gap": 0.11869747899159677,
"calib/step_q_w": 0.7555882352941176,
"calib/step_q_w_n": 34.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.13671875,
"completions/max_length": 2983.0,
"completions/max_terminated_length": 2983.0,
"completions/mean_length": 675.53515625,
"completions/mean_terminated_length": 782.5203857421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.11923281848430634,
"learning_rate": 7.5e-07,
"loss": 0.0024,
"num_tokens": 1228939.0,
"reward": 0.0234375,
"reward_std": 0.06247568503022194,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/format_reward_step": 0.0234375,
"step": 4
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/avg_num_step_conf": 0.1328125,
"calib/ece": 0.576,
"calib/final_conf_rate": 0.01953125,
"calib/format_rate": 0.015625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.018333333333333424,
"calib/mean_conf": 0.976,
"calib/mu_c": 0.965,
"calib/mu_w": 0.9833333333333334,
"calib/nonempty_final_conf_rate": 0.01953125,
"calib/nonempty_reasoning_rate": 0.0625,
"calib/nonempty_step_conf_rate": 0.02734375,
"calib/pce": 0.576,
"calib/std_conf": 0.01019803902718558,
"calib/step_conf_rate": 0.02734375,
"calib/step_q_c": 0.857,
"calib/step_q_c_n": 10.0,
"calib/step_q_gap": 0.029916666666666702,
"calib/step_q_w": 0.8270833333333333,
"calib/step_q_w_n": 24.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2821.0,
"completions/max_terminated_length": 2821.0,
"completions/mean_length": 673.78125,
"completions/mean_terminated_length": 749.9478149414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.08307817578315735,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0135,
"num_tokens": 1531923.0,
"reward": 0.015625,
"reward_std": 0.04037860035896301,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/format_reward_step": 0.015625,
"step": 5
},
{
"calib/answer_extract_rate": 0.109375,
"calib/avg_num_step_conf": 0.29296875,
"calib/ece": 0.7416666666666665,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.8888888888888888,
"calib/gap": 0.011071428571428399,
"calib/mean_conf": 0.9638888888888888,
"calib/mu_c": 0.9725,
"calib/mu_w": 0.9614285714285716,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.7416666666666665,
"calib/std_conf": 0.03093701287183523,
"calib/step_conf_rate": 0.06640625,
"calib/step_q_c": 0.8311764705882353,
"calib/step_q_c_n": 17.0,
"calib/step_q_gap": -0.02761663286004057,
"calib/step_q_w": 0.8587931034482759,
"calib/step_q_w_n": 58.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3032.0,
"completions/max_terminated_length": 3032.0,
"completions/mean_length": 675.99609375,
"completions/mean_terminated_length": 736.4042358398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.12601716816425323,
"learning_rate": 1.25e-06,
"loss": 0.0076,
"num_tokens": 1834738.0,
"reward": 0.037109375,
"reward_std": 0.0820886641740799,
"rewards/accuracy_reward_step": 0.015625,
"rewards/format_reward_step": 0.04296875,
"step": 6
},
{
"calib/answer_extract_rate": 0.08984375,
"calib/avg_num_step_conf": 0.2578125,
"calib/ece": 0.5982352941176472,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.05078125,
"calib/frac_conf_gt_0.9": 0.8235294117647058,
"calib/gap": -0.008030303030303054,
"calib/mean_conf": 0.9335294117647059,
"calib/mu_c": 0.9283333333333333,
"calib/mu_w": 0.9363636363636364,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.5894117647058824,
"calib/std_conf": 0.06944665125290214,
"calib/step_conf_rate": 0.06640625,
"calib/step_q_c": 0.854074074074074,
"calib/step_q_c_n": 27.0,
"calib/step_q_gap": 0.007407407407407196,
"calib/step_q_w": 0.8466666666666668,
"calib/step_q_w_n": 39.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 718.94140625,
"completions/mean_terminated_length": 810.7885131835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.14266818761825562,
"learning_rate": 1.5e-06,
"loss": 0.0091,
"num_tokens": 2150019.0,
"reward": 0.048828125,
"reward_std": 0.11278069764375687,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/format_reward_step": 0.05078125,
"step": 7
},
{
"calib/answer_extract_rate": 0.11328125,
"calib/avg_num_step_conf": 0.25390625,
"calib/ece": 0.6886666666666665,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.8666666666666667,
"calib/gap": -0.00045454545454548523,
"calib/mean_conf": 0.9553333333333334,
"calib/mu_c": 0.955,
"calib/mu_w": 0.9554545454545454,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.125,
"calib/nonempty_step_conf_rate": 0.0546875,
"calib/pce": 0.6886666666666665,
"calib/std_conf": 0.03480740661921763,
"calib/step_conf_rate": 0.0546875,
"calib/step_q_c": 0.855,
"calib/step_q_c_n": 16.0,
"calib/step_q_gap": 0.064795918367347,
"calib/step_q_w": 0.790204081632653,
"calib/step_q_w_n": 49.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3066.0,
"completions/max_terminated_length": 3066.0,
"completions/mean_length": 697.16796875,
"completions/mean_terminated_length": 759.4680786132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.13499559462070465,
"learning_rate": 1.75e-06,
"loss": 0.0125,
"num_tokens": 2458814.0,
"reward": 0.033203125,
"reward_std": 0.08851936459541321,
"rewards/accuracy_reward_step": 0.015625,
"rewards/format_reward_step": 0.03515625,
"step": 8
},
{
"calib/answer_extract_rate": 0.07421875,
"calib/avg_num_step_conf": 0.17578125,
"calib/ece": 0.9137500000000001,
"calib/final_conf_rate": 0.03125,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.625,
"calib/mean_conf": 0.9137500000000001,
"calib/mu_c": NaN,
"calib/mu_w": 0.9137500000000001,
"calib/nonempty_final_conf_rate": 0.03125,
"calib/nonempty_reasoning_rate": 0.078125,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.9137500000000001,
"calib/std_conf": 0.0823008961069076,
"calib/step_conf_rate": 0.05078125,
"calib/step_q_c": 0.9,
"calib/step_q_c_n": 1.0,
"calib/step_q_gap": 0.1890909090909092,
"calib/step_q_w": 0.7109090909090908,
"calib/step_q_w_n": 44.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 3072.0,
"completions/max_terminated_length": 3072.0,
"completions/mean_length": 760.875,
"completions/mean_terminated_length": 821.8733520507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0096,
"grad_norm": 0.0900043398141861,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0274,
"num_tokens": 2784942.0,
"reward": 0.01953125,
"reward_std": 0.04761157184839249,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/format_reward_step": 0.03125,
"step": 9
},
{
"calib/answer_extract_rate": 0.09765625,
"calib/avg_num_step_conf": 0.3125,
"calib/ece": 0.8388235294117645,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.8235294117647058,
"calib/mean_conf": 0.8388235294117646,
"calib/mu_c": NaN,
"calib/mu_w": 0.8388235294117646,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.12890625,
"calib/nonempty_step_conf_rate": 0.08203125,
"calib/pce": 0.8388235294117645,
"calib/std_conf": 0.27493062981609134,
"calib/step_conf_rate": 0.08203125,
"calib/step_q_w": 0.7589999999999999,
"calib/step_q_w_n": 80.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3047.0,
"completions/max_terminated_length": 3047.0,
"completions/mean_length": 694.37890625,
"completions/mean_terminated_length": 772.8739013671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.13634684681892395,
"learning_rate": 2.25e-06,
"loss": 0.0209,
"num_tokens": 3093311.0,
"reward": 0.015625,
"reward_std": 0.036563027650117874,
"rewards/accuracy_reward_step": 0.0,
"rewards/format_reward_step": 0.03125,
"step": 10
},
{
"calib/answer_extract_rate": 0.140625,
"calib/avg_num_step_conf": 0.328125,
"calib/ece": 0.8406666666666667,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7333333333333333,
"calib/gap": 0.056428571428571384,
"calib/mean_conf": 0.9073333333333333,
"calib/mu_c": 0.96,
"calib/mu_w": 0.9035714285714286,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.1640625,
"calib/nonempty_step_conf_rate": 0.078125,
"calib/pce": 0.8406666666666667,
"calib/std_conf": 0.1165313500975405,
"calib/step_conf_rate": 0.078125,
"calib/step_q_c": 0.8475,
"calib/step_q_c_n": 8.0,
"calib/step_q_gap": 0.07078947368421051,
"calib/step_q_w": 0.7767105263157895,
"calib/step_q_w_n": 76.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2891.0,
"completions/max_terminated_length": 2891.0,
"completions/mean_length": 671.57421875,
"completions/mean_terminated_length": 734.7136840820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.11572752147912979,
"learning_rate": 2.5e-06,
"loss": 0.0444,
"num_tokens": 3393522.0,
"reward": 0.033203125,
"reward_std": 0.08088821172714233,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/format_reward_step": 0.04296875,
"step": 11
},
{
"calib/answer_extract_rate": 0.140625,
"calib/avg_num_step_conf": 0.4609375,
"calib/ece": 0.6818750000000001,
"calib/final_conf_rate": 0.09375,
"calib/format_rate": 0.0703125,
"calib/frac_conf_gt_0.9": 0.875,
"calib/gap": -0.037941176470588034,
"calib/mean_conf": 0.936875,
"calib/mu_c": 0.9100000000000001,
"calib/mu_w": 0.9479411764705882,
"calib/nonempty_final_conf_rate": 0.09375,
"calib/nonempty_reasoning_rate": 0.1796875,
"calib/nonempty_step_conf_rate": 0.1171875,
"calib/pce": 0.6635416666666668,
"calib/std_conf": 0.0882918043101019,
"calib/step_conf_rate": 0.1171875,
"calib/step_q_c": 0.7531914893617022,
"calib/step_q_c_n": 47.0,
"calib/step_q_gap": -0.07110428528618495,
"calib/step_q_w": 0.8242957746478872,
"calib/step_q_w_n": 71.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 3070.0,
"completions/max_terminated_length": 3070.0,
"completions/mean_length": 731.23828125,
"completions/mean_terminated_length": 789.8607177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0128,
"grad_norm": 0.21758244931697845,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0009,
"num_tokens": 3708703.0,
"reward": 0.0703125,
"reward_std": 0.1610427349805832,
"rewards/accuracy_reward_step": 0.03515625,
"rewards/format_reward_step": 0.0703125,
"step": 12
},
{
"calib/answer_extract_rate": 0.17578125,
"calib/avg_num_step_conf": 0.6015625,
"calib/ece": 0.765,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.07421875,
"calib/frac_conf_gt_0.9": 0.6923076923076923,
"calib/gap": 0.03386363636363643,
"calib/mean_conf": 0.9188461538461538,
"calib/mu_c": 0.9475,
"calib/mu_w": 0.9136363636363636,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.19921875,
"calib/nonempty_step_conf_rate": 0.12109375,
"calib/pce": 0.765,
"calib/std_conf": 0.09608997878899045,
"calib/step_conf_rate": 0.12109375,
"calib/step_q_c": 0.8533333333333333,
"calib/step_q_c_n": 18.0,
"calib/step_q_gap": 0.06563039215686273,
"calib/step_q_w": 0.7877029411764706,
"calib/step_q_w_n": 136.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10546875,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 633.6171875,
"completions/mean_terminated_length": 708.3231811523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.15466050803661346,
"learning_rate": 3e-06,
"loss": 0.0635,
"num_tokens": 3999309.0,
"reward": 0.052734375,
"reward_std": 0.13007746636867523,
"rewards/accuracy_reward_step": 0.015625,
"rewards/format_reward_step": 0.07421875,
"step": 13
},
{
"calib/answer_extract_rate": 0.2578125,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.7146153846153847,
"calib/final_conf_rate": 0.203125,
"calib/format_rate": 0.1875,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": -0.048248337028824695,
"calib/mean_conf": 0.8907692307692308,
"calib/mu_c": 0.8527272727272728,
"calib/mu_w": 0.9009756097560975,
"calib/nonempty_final_conf_rate": 0.203125,
"calib/nonempty_reasoning_rate": 0.29296875,
"calib/nonempty_step_conf_rate": 0.234375,
"calib/pce": 0.696923076923077,
"calib/std_conf": 0.20182671693469884,
"calib/step_conf_rate": 0.234375,
"calib/step_q_c": 0.7833962264150944,
"calib/step_q_c_n": 53.0,
"calib/step_q_gap": -0.018564557898630873,
"calib/step_q_w": 0.8019607843137253,
"calib/step_q_w_n": 204.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 3043.0,
"completions/max_terminated_length": 3043.0,
"completions/mean_length": 588.078125,
"completions/mean_terminated_length": 643.3675537109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.2625541388988495,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0223,
"num_tokens": 4279065.0,
"reward": 0.1484375,
"reward_std": 0.19539156556129456,
"rewards/accuracy_reward_step": 0.0546875,
"rewards/format_reward_step": 0.1875,
"step": 14
},
{
"calib/answer_extract_rate": 0.33984375,
"calib/avg_num_step_conf": 1.3515625,
"calib/ece": 0.6461194029850745,
"calib/final_conf_rate": 0.26171875,
"calib/format_rate": 0.23046875,
"calib/frac_conf_gt_0.9": 0.746268656716418,
"calib/gap": 0.04512941176470597,
"calib/mean_conf": 0.8998507462686566,
"calib/mu_c": 0.9335294117647058,
"calib/mu_w": 0.8883999999999999,
"calib/nonempty_final_conf_rate": 0.26171875,
"calib/nonempty_reasoning_rate": 0.37109375,
"calib/nonempty_step_conf_rate": 0.296875,
"calib/pce": 0.6461194029850745,
"calib/std_conf": 0.16033074525048963,
"calib/step_conf_rate": 0.296875,
"calib/step_q_c": 0.8161428571428572,
"calib/step_q_c_n": 70.0,
"calib/step_q_gap": 0.07853416149068326,
"calib/step_q_w": 0.7376086956521739,
"calib/step_q_w_n": 276.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2966.0,
"completions/max_terminated_length": 2966.0,
"completions/mean_length": 566.6953125,
"completions/mean_terminated_length": 601.9668579101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.016,
"grad_norm": 0.31918662786483765,
"learning_rate": 3.5e-06,
"loss": 0.0979,
"num_tokens": 4555827.0,
"reward": 0.181640625,
"reward_std": 0.31031230092048645,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/format_reward_step": 0.23046875,
"step": 15
},
{
"calib/answer_extract_rate": 0.4296875,
"calib/avg_num_step_conf": 1.87890625,
"calib/ece": 0.6019709677419354,
"calib/final_conf_rate": 0.36328125,
"calib/format_rate": 0.3046875,
"calib/frac_conf_gt_0.9": 0.7956989247311828,
"calib/gap": 0.0006426229508195158,
"calib/mean_conf": 0.9320784946236558,
"calib/mu_c": 0.9325,
"calib/mu_w": 0.9318573770491805,
"calib/nonempty_final_conf_rate": 0.36328125,
"calib/nonempty_reasoning_rate": 0.4921875,
"calib/nonempty_step_conf_rate": 0.390625,
"calib/pce": 0.5949817204301074,
"calib/std_conf": 0.10571686252150013,
"calib/step_conf_rate": 0.390625,
"calib/step_q_c": 0.7988636363636363,
"calib/step_q_c_n": 132.0,
"calib/step_q_gap": 0.038531544673092055,
"calib/step_q_w": 0.7603320916905443,
"calib/step_q_w_n": 349.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2949.0,
"completions/max_terminated_length": 2949.0,
"completions/mean_length": 532.25,
"completions/mean_terminated_length": 565.3776245117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.3399321436882019,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.1148,
"num_tokens": 4824739.0,
"reward": 0.28125,
"reward_std": 0.3410623073577881,
"rewards/accuracy_reward_step": 0.12890625,
"rewards/format_reward_step": 0.3046875,
"step": 16
},
{
"calib/answer_extract_rate": 0.56640625,
"calib/avg_num_step_conf": 2.45703125,
"calib/ece": 0.6452459016393444,
"calib/final_conf_rate": 0.4765625,
"calib/format_rate": 0.4375,
"calib/frac_conf_gt_0.9": 0.7213114754098361,
"calib/gap": -0.03744832041343693,
"calib/mean_conf": 0.9127868852459018,
"calib/mu_c": 0.8863888888888889,
"calib/mu_w": 0.9238372093023258,
"calib/nonempty_final_conf_rate": 0.4765625,
"calib/nonempty_reasoning_rate": 0.63671875,
"calib/nonempty_step_conf_rate": 0.56640625,
"calib/pce": 0.6314754098360658,
"calib/std_conf": 0.1096511778851909,
"calib/step_conf_rate": 0.56640625,
"calib/step_q_c": 0.7571951219512195,
"calib/step_q_c_n": 164.0,
"calib/step_q_gap": -0.023579071597167545,
"calib/step_q_w": 0.7807741935483871,
"calib/step_q_w_n": 465.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 3004.0,
"completions/max_terminated_length": 3004.0,
"completions/mean_length": 443.328125,
"completions/mean_terminated_length": 459.4817810058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.40701761841773987,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0584,
"num_tokens": 5065567.0,
"reward": 0.3828125,
"reward_std": 0.4032036066055298,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/format_reward_step": 0.4375,
"step": 17
},
{
"calib/answer_extract_rate": 0.63671875,
"calib/avg_num_step_conf": 2.8515625,
"calib/ece": 0.6883430656934306,
"calib/final_conf_rate": 0.53515625,
"calib/format_rate": 0.47265625,
"calib/frac_conf_gt_0.9": 0.8467153284671532,
"calib/gap": 0.0022530253025297986,
"calib/mean_conf": 0.9511167883211679,
"calib/mu_c": 0.9527777777777775,
"calib/mu_w": 0.9505247524752477,
"calib/nonempty_final_conf_rate": 0.53515625,
"calib/nonempty_reasoning_rate": 0.71484375,
"calib/nonempty_step_conf_rate": 0.63671875,
"calib/pce": 0.6883430656934306,
"calib/std_conf": 0.06206166767066995,
"calib/step_conf_rate": 0.63671875,
"calib/step_q_c": 0.787375,
"calib/step_q_c_n": 160.0,
"calib/step_q_gap": -0.01965078947368404,
"calib/step_q_w": 0.8070257894736841,
"calib/step_q_w_n": 570.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 3052.0,
"completions/max_terminated_length": 3052.0,
"completions/mean_length": 494.27734375,
"completions/mean_terminated_length": 504.12353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.0192,
"grad_norm": 0.28891655802726746,
"learning_rate": 4.25e-06,
"loss": 0.0822,
"num_tokens": 5326630.0,
"reward": 0.400390625,
"reward_std": 0.41276422142982483,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/format_reward_step": 0.47265625,
"step": 18
},
{
"calib/answer_extract_rate": 0.91015625,
"calib/avg_num_step_conf": 4.27734375,
"calib/ece": 0.7442465116279071,
"calib/final_conf_rate": 0.83984375,
"calib/format_rate": 0.80859375,
"calib/frac_conf_gt_0.9": 0.7534883720930232,
"calib/gap": 0.005089999999999928,
"calib/mean_conf": 0.9221069767441861,
"calib/mu_c": 0.9262499999999999,
"calib/mu_w": 0.92116,
"calib/nonempty_final_conf_rate": 0.83984375,
"calib/nonempty_reasoning_rate": 0.95703125,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.7401534883720932,
"calib/std_conf": 0.11782830511472514,
"calib/step_conf_rate": 0.93359375,
"calib/step_q_c": 0.7984269662921348,
"calib/step_q_c_n": 178.0,
"calib/step_q_gap": 0.009195450479703049,
"calib/step_q_w": 0.7892315158124318,
"calib/step_q_w_n": 917.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2930.0,
"completions/max_terminated_length": 2930.0,
"completions/mean_length": 351.328125,
"completions/mean_terminated_length": 354.094482421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.33571699261665344,
"learning_rate": 4.5e-06,
"loss": 0.0736,
"num_tokens": 5545138.0,
"reward": 0.576171875,
"reward_std": 0.34173643589019775,
"rewards/accuracy_reward_step": 0.171875,
"rewards/format_reward_step": 0.80859375,
"step": 19
},
{
"calib/answer_extract_rate": 0.921875,
"calib/avg_num_step_conf": 3.984375,
"calib/ece": 0.6350000000000002,
"calib/final_conf_rate": 0.8828125,
"calib/format_rate": 0.8359375,
"calib/frac_conf_gt_0.9": 0.8230088495575221,
"calib/gap": -0.021809177646524458,
"calib/mean_conf": 0.9403097345132743,
"calib/mu_c": 0.9253521126760562,
"calib/mu_w": 0.9471612903225807,
"calib/nonempty_final_conf_rate": 0.8828125,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.6305752212389383,
"calib/std_conf": 0.0730401247037796,
"calib/step_conf_rate": 0.93359375,
"calib/step_q_c": 0.7642567567567567,
"calib/step_q_c_n": 296.0,
"calib/step_q_gap": -0.03545180677915494,
"calib/step_q_w": 0.7997085635359117,
"calib/step_q_w_n": 724.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3042.0,
"completions/max_terminated_length": 3042.0,
"completions/mean_length": 331.35546875,
"completions/mean_terminated_length": 332.6549072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 37.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.3089500963687897,
"learning_rate": 4.75e-06,
"loss": 0.1082,
"num_tokens": 5758645.0,
"reward": 0.7109375,
"reward_std": 0.38752901554107666,
"rewards/accuracy_reward_step": 0.29296875,
"rewards/format_reward_step": 0.8359375,
"step": 20
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 4.43359375,
"calib/ece": 0.6724793388429753,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.921875,
"calib/frac_conf_gt_0.9": 0.743801652892562,
"calib/gap": 0.02329861425595514,
"calib/mean_conf": 0.9140495867768595,
"calib/mu_c": 0.9314754098360656,
"calib/mu_w": 0.9081767955801104,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.6672314049586777,
"calib/std_conf": 0.14302850962638514,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.7598765432098764,
"calib/step_q_c_n": 243.0,
"calib/step_q_gap": -0.023733322260975576,
"calib/step_q_w": 0.783609865470852,
"calib/step_q_w_n": 892.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2673.0,
"completions/max_terminated_length": 2673.0,
"completions/mean_length": 321.828125,
"completions/mean_terminated_length": 323.0902099609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.0224,
"grad_norm": 0.22014741599559784,
"learning_rate": 5e-06,
"loss": 0.0298,
"num_tokens": 5967801.0,
"reward": 0.70703125,
"reward_std": 0.3434026837348938,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/format_reward_step": 0.921875,
"step": 21
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.41796875,
"calib/ece": 0.6465617529880476,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.8047808764940239,
"calib/gap": 0.027354009542866042,
"calib/mean_conf": 0.9318207171314741,
"calib/mu_c": 0.9512191780821919,
"calib/mu_w": 0.9238651685393259,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.6437729083665338,
"calib/std_conf": 0.11622469192487259,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7953690058479533,
"calib/step_q_c_n": 342.0,
"calib/step_q_gap": 0.016090171880906445,
"calib/step_q_w": 0.7792788339670469,
"calib/step_q_w_n": 789.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1216.0,
"completions/max_terminated_length": 1216.0,
"completions/mean_length": 292.609375,
"completions/mean_terminated_length": 292.609375,
"completions/min_length": 80.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.25837308168411255,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.0051,
"num_tokens": 6168333.0,
"reward": 0.76953125,
"reward_std": 0.37649795413017273,
"rewards/accuracy_reward_step": 0.2890625,
"rewards/format_reward_step": 0.9609375,
"step": 22
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.2734375,
"calib/ece": 0.6694779116465864,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.8433734939759037,
"calib/gap": -0.0037214684756584626,
"calib/mean_conf": 0.9473895582329317,
"calib/mu_c": 0.9447142857142856,
"calib/mu_w": 0.9484357541899441,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.6678714859437752,
"calib/std_conf": 0.05626255193649903,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7870394736842105,
"calib/step_q_c_n": 304.0,
"calib/step_q_gap": 0.0010901065956029354,
"calib/step_q_w": 0.7859493670886075,
"calib/step_q_w_n": 790.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1397.0,
"completions/max_terminated_length": 1397.0,
"completions/mean_length": 302.87890625,
"completions/mean_terminated_length": 304.0666809082031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.26379406452178955,
"learning_rate": 4.944444444444445e-06,
"loss": -0.0043,
"num_tokens": 6373614.0,
"reward": 0.74609375,
"reward_std": 0.3697567582130432,
"rewards/accuracy_reward_step": 0.27734375,
"rewards/format_reward_step": 0.9375,
"step": 23
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.546875,
"calib/ece": 0.7166532258064517,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.7862903225806451,
"calib/gap": 0.0024179970972422193,
"calib/mean_conf": 0.9303629032258065,
"calib/mu_c": 0.9322641509433962,
"calib/mu_w": 0.929846153846154,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.7166532258064517,
"calib/std_conf": 0.10919181366850009,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8040654205607476,
"calib/step_q_c_n": 214.0,
"calib/step_q_gap": 0.00751805213969492,
"calib/step_q_w": 0.7965473684210527,
"calib/step_q_w_n": 950.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1059.0,
"completions/max_terminated_length": 1059.0,
"completions/mean_length": 286.80078125,
"completions/mean_terminated_length": 286.80078125,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.0256,
"grad_norm": 0.256256639957428,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.025,
"num_tokens": 6575355.0,
"reward": 0.685546875,
"reward_std": 0.34957581758499146,
"rewards/accuracy_reward_step": 0.20703125,
"rewards/format_reward_step": 0.95703125,
"step": 24
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.6953125,
"calib/ece": 0.6879785809906291,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.8433734939759037,
"calib/gap": -0.0027747747747748353,
"calib/mean_conf": 0.9353949129852746,
"calib/mu_c": 0.9333333333333333,
"calib/mu_w": 0.9361081081081082,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6831726907630522,
"calib/std_conf": 0.11454053393592831,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.819756838905775,
"calib/step_q_c_n": 329.0,
"calib/step_q_gap": 0.02302946204437739,
"calib/step_q_w": 0.7967273768613976,
"calib/step_q_w_n": 873.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 871.0,
"completions/max_terminated_length": 871.0,
"completions/mean_length": 288.19140625,
"completions/mean_terminated_length": 289.32159423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.26059645414352417,
"learning_rate": 4.888888888888889e-06,
"loss": -0.0084,
"num_tokens": 6776164.0,
"reward": 0.73828125,
"reward_std": 0.35654735565185547,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/format_reward_step": 0.9609375,
"step": 25
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/avg_num_step_conf": 4.8671875,
"calib/ece": 0.7299335989375831,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.8884462151394422,
"calib/gap": 0.007348290598290563,
"calib/mean_conf": 0.9530411686586986,
"calib/mu_c": 0.95875,
"calib/mu_w": 0.9514017094017094,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.7299335989375831,
"calib/std_conf": 0.08047558716130868,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8019241877256318,
"calib/step_q_c_n": 277.0,
"calib/step_q_gap": -0.005229303846435718,
"calib/step_q_w": 0.8071534915720675,
"calib/step_q_w_n": 969.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1940.0,
"completions/max_terminated_length": 1940.0,
"completions/mean_length": 332.44140625,
"completions/mean_terminated_length": 332.44140625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.19963575899600983,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0327,
"num_tokens": 6990317.0,
"reward": 0.701171875,
"reward_std": 0.2856733798980713,
"rewards/accuracy_reward_step": 0.21875,
"rewards/format_reward_step": 0.96484375,
"step": 26
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.76171875,
"calib/ece": 0.7093019607843137,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9137254901960784,
"calib/gap": 0.004200892857142646,
"calib/mean_conf": 0.9563607843137255,
"calib/mu_c": 0.9595238095238094,
"calib/mu_w": 0.9553229166666668,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.7093019607843137,
"calib/std_conf": 0.04585770009723973,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7931908424908425,
"calib/step_q_c_n": 273.0,
"calib/step_q_gap": -0.01610196934848085,
"calib/step_q_w": 0.8092928118393233,
"calib/step_q_w_n": 946.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 677.0,
"completions/max_terminated_length": 677.0,
"completions/mean_length": 297.78125,
"completions/mean_terminated_length": 298.94903564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.0288,
"grad_norm": 0.255687952041626,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0145,
"num_tokens": 7195573.0,
"reward": 0.734375,
"reward_std": 0.3340994119644165,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/format_reward_step": 0.9765625,
"step": 27
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.0390625,
"calib/ece": 0.5988373015873018,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.8690476190476191,
"calib/gap": 0.004648148148148401,
"calib/mean_conf": 0.9520119047619048,
"calib/mu_c": 0.9550000000000002,
"calib/mu_w": 0.9503518518518518,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.5968531746031748,
"calib/std_conf": 0.08081894727864886,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8013708920187794,
"calib/step_q_c_n": 426.0,
"calib/step_q_gap": -0.028470774647887276,
"calib/step_q_w": 0.8298416666666667,
"calib/step_q_w_n": 864.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1098.0,
"completions/max_terminated_length": 1098.0,
"completions/mean_length": 325.05859375,
"completions/mean_terminated_length": 326.3333435058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.22692817449569702,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0005,
"num_tokens": 7409540.0,
"reward": 0.83984375,
"reward_std": 0.3686498999595642,
"rewards/accuracy_reward_step": 0.3515625,
"rewards/format_reward_step": 0.9765625,
"step": 28
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 5.4765625,
"calib/ece": 0.7113740157480315,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.905511811023622,
"calib/gap": -0.004400149588631441,
"calib/mean_conf": 0.9575944881889763,
"calib/mu_c": 0.9542857142857143,
"calib/mu_w": 0.9586858638743457,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.7104685039370079,
"calib/std_conf": 0.044223449808071566,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8052698412698414,
"calib/step_q_c_n": 315.0,
"calib/step_q_gap": 0.00023488266818538772,
"calib/step_q_w": 0.805034958601656,
"calib/step_q_w_n": 1087.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1968.0,
"completions/max_terminated_length": 1968.0,
"completions/mean_length": 364.38671875,
"completions/mean_terminated_length": 364.38671875,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.17971405386924744,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0278,
"num_tokens": 7633759.0,
"reward": 0.734375,
"reward_std": 0.31729739904403687,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/format_reward_step": 0.9765625,
"step": 29
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 5.78515625,
"calib/ece": 0.6281944444444444,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.9047619047619048,
"calib/gap": 0.0041780138304698955,
"calib/mean_conf": 0.9575595238095238,
"calib/mu_c": 0.9603614457831327,
"calib/mu_w": 0.9561834319526628,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.6281944444444444,
"calib/std_conf": 0.044819926617470765,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7859325842696628,
"calib/step_q_c_n": 445.0,
"calib/step_q_gap": -0.007205157043078447,
"calib/step_q_w": 0.7931377413127413,
"calib/step_q_w_n": 1036.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1096.0,
"completions/max_terminated_length": 1096.0,
"completions/mean_length": 383.33984375,
"completions/mean_terminated_length": 384.8431701660156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.0,
"epoch": 0.032,
"grad_norm": 0.2327258139848709,
"learning_rate": 4.75e-06,
"loss": -0.0201,
"num_tokens": 7862686.0,
"reward": 0.806640625,
"reward_std": 0.43618446588516235,
"rewards/accuracy_reward_step": 0.32421875,
"rewards/format_reward_step": 0.96484375,
"step": 30
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.1171875,
"calib/ece": 0.68082,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.908,
"calib/gap": 0.01231669521246248,
"calib/mean_conf": 0.9488199999999999,
"calib/mu_c": 0.9578358208955225,
"calib/mu_w": 0.9455191256830601,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.68082,
"calib/std_conf": 0.09194078311609054,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8134969325153374,
"calib/step_q_c_n": 326.0,
"calib/step_q_gap": -0.0012135513556303934,
"calib/step_q_w": 0.8147104838709678,
"calib/step_q_w_n": 1240.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 975.0,
"completions/max_terminated_length": 975.0,
"completions/mean_length": 384.53515625,
"completions/mean_terminated_length": 386.04315185546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.19871972501277924,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0345,
"num_tokens": 8090847.0,
"reward": 0.74609375,
"reward_std": 0.2939828038215637,
"rewards/accuracy_reward_step": 0.26171875,
"rewards/format_reward_step": 0.96875,
"step": 31
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 5.6171875,
"calib/ece": 0.61597609561753,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.8924302788844621,
"calib/gap": 0.0003054571226080993,
"calib/mean_conf": 0.9546215139442231,
"calib/mu_c": 0.9548235294117648,
"calib/mu_w": 0.9545180722891567,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.61597609561753,
"calib/std_conf": 0.04114104801741104,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7840430107526881,
"calib/step_q_c_n": 465.0,
"calib/step_q_gap": 0.006000873034291421,
"calib/step_q_w": 0.7780421377183967,
"calib/step_q_w_n": 973.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1993.0,
"completions/max_terminated_length": 1993.0,
"completions/mean_length": 395.8515625,
"completions/mean_terminated_length": 395.8515625,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.19935005903244019,
"learning_rate": 4.694444444444445e-06,
"loss": 0.065,
"num_tokens": 8322697.0,
"reward": 0.82421875,
"reward_std": 0.32162073254585266,
"rewards/accuracy_reward_step": 0.3359375,
"rewards/format_reward_step": 0.9765625,
"step": 32
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/avg_num_step_conf": 6.15234375,
"calib/ece": 0.6119277108433736,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.9317269076305221,
"calib/gap": 0.013934002869440354,
"calib/mean_conf": 0.9532931726907631,
"calib/mu_c": 0.9624705882352942,
"calib/mu_w": 0.9485365853658538,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6119277108433736,
"calib/std_conf": 0.09626560476211199,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7924902723735409,
"calib/step_q_c_n": 514.0,
"calib/step_q_gap": -0.004799360048702295,
"calib/step_q_w": 0.7972896324222432,
"calib/step_q_w_n": 1061.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2053.0,
"completions/max_terminated_length": 2053.0,
"completions/mean_length": 441.0703125,
"completions/mean_terminated_length": 441.0703125,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.0352,
"grad_norm": 0.17285677790641785,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0726,
"num_tokens": 8566291.0,
"reward": 0.814453125,
"reward_std": 0.29825982451438904,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/format_reward_step": 0.96484375,
"step": 33
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 6.35546875,
"calib/ece": 0.6627509881422926,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.924901185770751,
"calib/gap": -0.00461513108614231,
"calib/mean_conf": 0.9591936758893281,
"calib/mu_c": 0.9559466666666668,
"calib/mu_w": 0.9605617977528091,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6627509881422926,
"calib/std_conf": 0.03106359089630838,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.794635103926097,
"calib/step_q_c_n": 433.0,
"calib/step_q_gap": 0.00898895652241194,
"calib/step_q_w": 0.7856461474036851,
"calib/step_q_w_n": 1194.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2071.0,
"completions/max_terminated_length": 2071.0,
"completions/mean_length": 403.58984375,
"completions/mean_terminated_length": 403.58984375,
"completions/min_length": 177.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.20357801020145416,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0515,
"num_tokens": 8798530.0,
"reward": 0.78125,
"reward_std": 0.33316582441329956,
"rewards/accuracy_reward_step": 0.29296875,
"rewards/format_reward_step": 0.9765625,
"step": 34
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/avg_num_step_conf": 6.57421875,
"calib/ece": 0.6264516129032259,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.9354838709677419,
"calib/gap": 0.011807228915662549,
"calib/mean_conf": 0.9570967741935484,
"calib/mu_c": 0.965,
"calib/mu_w": 0.9531927710843374,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6264516129032259,
"calib/std_conf": 0.07159219404138058,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7909735349716447,
"calib/step_q_c_n": 529.0,
"calib/step_q_gap": 0.006346151956046708,
"calib/step_q_w": 0.784627383015598,
"calib/step_q_w_n": 1154.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2484.0,
"completions/max_terminated_length": 2484.0,
"completions/mean_length": 503.55859375,
"completions/mean_terminated_length": 505.5333557128906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.21932625770568848,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0716,
"num_tokens": 9060505.0,
"reward": 0.798828125,
"reward_std": 0.366168349981308,
"rewards/accuracy_reward_step": 0.3203125,
"rewards/format_reward_step": 0.95703125,
"step": 35
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.2265625,
"calib/ece": 0.4248616600790514,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.8972332015810277,
"calib/gap": -0.004706842435655889,
"calib/mean_conf": 0.9554545454545454,
"calib/mu_c": 0.9532592592592593,
"calib/mu_w": 0.9579661016949151,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4233596837944664,
"calib/std_conf": 0.03562389876379772,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7755802469135803,
"calib/step_q_c_n": 810.0,
"calib/step_q_gap": -0.006090671453766516,
"calib/step_q_w": 0.7816709183673468,
"calib/step_q_w_n": 784.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2079.0,
"completions/max_terminated_length": 2079.0,
"completions/mean_length": 412.609375,
"completions/mean_terminated_length": 412.609375,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.0384,
"grad_norm": 0.21121746301651,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0364,
"num_tokens": 9292653.0,
"reward": 1.01953125,
"reward_std": 0.3650910258293152,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.984375,
"step": 36
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 5.96875,
"calib/ece": 0.5700793650793651,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.8809523809523809,
"calib/gap": 0.0042899900232789,
"calib/mean_conf": 0.9526190476190476,
"calib/mu_c": 0.955257731958763,
"calib/mu_w": 0.9509677419354841,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5688888888888889,
"calib/std_conf": 0.04374671999366445,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7560966542750929,
"calib/step_q_c_n": 538.0,
"calib/step_q_gap": 0.016074432052870624,
"calib/step_q_w": 0.7400222222222222,
"calib/step_q_w_n": 990.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2471.0,
"completions/max_terminated_length": 2471.0,
"completions/mean_length": 427.5390625,
"completions/mean_terminated_length": 427.5390625,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.2129954993724823,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0473,
"num_tokens": 9533007.0,
"reward": 0.869140625,
"reward_std": 0.3198983669281006,
"rewards/accuracy_reward_step": 0.37890625,
"rewards/format_reward_step": 0.98046875,
"step": 37
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 5.9453125,
"calib/ece": 0.5087698412698413,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.8611111111111112,
"calib/gap": 0.006982940387195491,
"calib/mean_conf": 0.9492460317460316,
"calib/mu_c": 0.9531531531531531,
"calib/mu_w": 0.9461702127659576,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.5087698412698413,
"calib/std_conf": 0.06292338330694436,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7546644844517184,
"calib/step_q_c_n": 611.0,
"calib/step_q_gap": 0.006673266010445156,
"calib/step_q_w": 0.7479912184412733,
"calib/step_q_w_n": 911.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2220.0,
"completions/max_terminated_length": 2220.0,
"completions/mean_length": 427.37890625,
"completions/mean_terminated_length": 430.74407958984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.20735085010528564,
"learning_rate": 4.527777777777778e-06,
"loss": 0.0576,
"num_tokens": 9773112.0,
"reward": 0.919921875,
"reward_std": 0.3206837773323059,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/format_reward_step": 0.97265625,
"step": 38
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.85546875,
"calib/ece": 0.5436758893280633,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.8893280632411067,
"calib/gap": 0.021607843137254834,
"calib/mean_conf": 0.9389328063241107,
"calib/mu_c": 0.9520000000000001,
"calib/mu_w": 0.9303921568627452,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5436758893280633,
"calib/std_conf": 0.12431332112497273,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7363853211009175,
"calib/step_q_c_n": 545.0,
"calib/step_q_gap": 0.03206666281999504,
"calib/step_q_w": 0.7043186582809224,
"calib/step_q_w_n": 954.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2379.0,
"completions/max_terminated_length": 2379.0,
"completions/mean_length": 444.26171875,
"completions/mean_terminated_length": 444.26171875,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.0416,
"grad_norm": 0.18263089656829834,
"learning_rate": 4.5e-06,
"loss": 0.0384,
"num_tokens": 10016739.0,
"reward": 0.884765625,
"reward_std": 0.2555467486381531,
"rewards/accuracy_reward_step": 0.390625,
"rewards/format_reward_step": 0.98828125,
"step": 39
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.55859375,
"calib/ece": 0.5652283464566932,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9094488188976378,
"calib/gap": -0.0035459107201043505,
"calib/mean_conf": 0.954992125984252,
"calib/mu_c": 0.9528282828282827,
"calib/mu_w": 0.956374193548387,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5652283464566932,
"calib/std_conf": 0.028724456934620224,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7275092936802975,
"calib/step_q_c_n": 538.0,
"calib/step_q_gap": -0.03869409615021091,
"calib/step_q_w": 0.7662033898305084,
"calib/step_q_w_n": 885.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2410.0,
"completions/max_terminated_length": 2410.0,
"completions/mean_length": 449.60546875,
"completions/mean_terminated_length": 449.60546875,
"completions/min_length": 139.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.19915395975112915,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0304,
"num_tokens": 10262406.0,
"reward": 0.8828125,
"reward_std": 0.34560567140579224,
"rewards/accuracy_reward_step": 0.390625,
"rewards/format_reward_step": 0.984375,
"step": 40
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.28125,
"calib/ece": 0.3246456692913384,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.8228346456692913,
"calib/gap": 0.008961849103683917,
"calib/mean_conf": 0.942755905511811,
"calib/mu_c": 0.9461783439490447,
"calib/mu_w": 0.9372164948453607,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3246456692913384,
"calib/std_conf": 0.040657531972191956,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7432474226804123,
"calib/step_q_c_n": 776.0,
"calib/step_q_gap": 0.03934117268041237,
"calib/step_q_w": 0.70390625,
"calib/step_q_w_n": 576.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1955.0,
"completions/max_terminated_length": 1955.0,
"completions/mean_length": 390.625,
"completions/mean_terminated_length": 390.625,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.22646979987621307,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0179,
"num_tokens": 10493462.0,
"reward": 1.107421875,
"reward_std": 0.3214534521102905,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/format_reward_step": 0.98828125,
"step": 41
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.15234375,
"calib/ece": 0.4643700787401575,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.8346456692913385,
"calib/gap": 0.0007810849437642764,
"calib/mean_conf": 0.940748031496063,
"calib/mu_c": 0.9411570247933884,
"calib/mu_w": 0.9403759398496241,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4643700787401575,
"calib/std_conf": 0.041665988793130104,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6892431561996779,
"calib/step_q_c_n": 621.0,
"calib/step_q_gap": -0.0056120013934453095,
"calib/step_q_w": 0.6948551575931232,
"calib/step_q_w_n": 698.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1862.0,
"completions/max_terminated_length": 1862.0,
"completions/mean_length": 364.67578125,
"completions/mean_terminated_length": 364.67578125,
"completions/min_length": 169.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.0448,
"grad_norm": 0.2288183718919754,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0256,
"num_tokens": 10714995.0,
"reward": 0.96875,
"reward_std": 0.3018547296524048,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/format_reward_step": 0.9921875,
"step": 42
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.0234375,
"calib/ece": 0.45517647058823535,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.8392156862745098,
"calib/gap": 0.012088771238611296,
"calib/mean_conf": 0.9414509803921568,
"calib/mu_c": 0.9476612903225807,
"calib/mu_w": 0.9355725190839694,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.45517647058823535,
"calib/std_conf": 0.05712604355265649,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7093910256410256,
"calib/step_q_c_n": 624.0,
"calib/step_q_gap": 0.01144540630567814,
"calib/step_q_w": 0.6979456193353475,
"calib/step_q_w_n": 662.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1429.0,
"completions/max_terminated_length": 1429.0,
"completions/mean_length": 407.31640625,
"completions/mean_terminated_length": 408.91375732421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.2891763150691986,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0006,
"num_tokens": 10948300.0,
"reward": 0.982421875,
"reward_std": 0.3541671335697174,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.99609375,
"step": 43
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.171875,
"calib/ece": 0.5323320158102767,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.8102766798418972,
"calib/gap": 0.017405531749123737,
"calib/mean_conf": 0.935494071146245,
"calib/mu_c": 0.9458823529411766,
"calib/mu_w": 0.9284768211920529,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.5323320158102767,
"calib/std_conf": 0.06607766543989756,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6990192307692307,
"calib/step_q_c_n": 520.0,
"calib/step_q_gap": -0.01525937619594342,
"calib/step_q_w": 0.7142786069651741,
"calib/step_q_w_n": 804.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1834.0,
"completions/max_terminated_length": 1834.0,
"completions/mean_length": 428.85546875,
"completions/mean_terminated_length": 428.85546875,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.21532444655895233,
"learning_rate": 4.361111111111112e-06,
"loss": 0.014,
"num_tokens": 11188215.0,
"reward": 0.890625,
"reward_std": 0.27451932430267334,
"rewards/accuracy_reward_step": 0.3984375,
"rewards/format_reward_step": 0.984375,
"step": 44
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 4.734375,
"calib/ece": 0.4757936507936507,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.7658730158730159,
"calib/gap": 0.021655225019069246,
"calib/mean_conf": 0.9218253968253968,
"calib/mu_c": 0.9336842105263157,
"calib/mu_w": 0.9120289855072464,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.47261904761904755,
"calib/std_conf": 0.11485256797683835,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7284021543985638,
"calib/step_q_c_n": 557.0,
"calib/step_q_gap": 0.03238688722299132,
"calib/step_q_w": 0.6960152671755725,
"calib/step_q_w_n": 655.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2585.0,
"completions/max_terminated_length": 2585.0,
"completions/mean_length": 417.9921875,
"completions/mean_terminated_length": 417.9921875,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.048,
"grad_norm": 0.22201699018478394,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0488,
"num_tokens": 11424077.0,
"reward": 0.9375,
"reward_std": 0.2813979387283325,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/format_reward_step": 0.984375,
"step": 45
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.99609375,
"calib/ece": 0.49800796812748993,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.7768924302788844,
"calib/gap": -0.005764388489208683,
"calib/mean_conf": 0.9394422310756972,
"calib/mu_c": 0.9362499999999999,
"calib/mu_w": 0.9420143884892086,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.49561752988047797,
"calib/std_conf": 0.04473569625394344,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7082641509433962,
"calib/step_q_c_n": 530.0,
"calib/step_q_gap": 0.020360279114290725,
"calib/step_q_w": 0.6879038718291055,
"calib/step_q_w_n": 749.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2897.0,
"completions/max_terminated_length": 2897.0,
"completions/mean_length": 442.06640625,
"completions/mean_terminated_length": 442.06640625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.22355523705482483,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0527,
"num_tokens": 11665822.0,
"reward": 0.927734375,
"reward_std": 0.2827729284763336,
"rewards/accuracy_reward_step": 0.4375,
"rewards/format_reward_step": 0.98046875,
"step": 46
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.0234375,
"calib/ece": 0.4428346456692914,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.7244094488188977,
"calib/gap": 0.009217984496123988,
"calib/mean_conf": 0.9325984251968504,
"calib/mu_c": 0.93728,
"calib/mu_w": 0.928062015503876,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4416535433070867,
"calib/std_conf": 0.0665305873462583,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6843163097199341,
"calib/step_q_c_n": 607.0,
"calib/step_q_gap": 0.04003059543421972,
"calib/step_q_w": 0.6442857142857144,
"calib/step_q_w_n": 679.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2423.0,
"completions/max_terminated_length": 2423.0,
"completions/mean_length": 424.06640625,
"completions/mean_terminated_length": 424.06640625,
"completions/min_length": 198.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.2427399754524231,
"learning_rate": 4.277777777777778e-06,
"loss": -0.0224,
"num_tokens": 11904167.0,
"reward": 0.982421875,
"reward_std": 0.30143460631370544,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 0.98828125,
"step": 47
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.3828125,
"calib/ece": 0.5288188976377952,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.7559055118110236,
"calib/gap": 0.0007895582845753868,
"calib/mean_conf": 0.9311811023622047,
"calib/mu_c": 0.9316504854368934,
"calib/mu_w": 0.930860927152318,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5272440944881889,
"calib/std_conf": 0.05975822907426125,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6786977886977887,
"calib/step_q_c_n": 407.0,
"calib/step_q_gap": 0.0012012852012852626,
"calib/step_q_w": 0.6774965034965035,
"calib/step_q_w_n": 715.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1633.0,
"completions/max_terminated_length": 1633.0,
"completions/mean_length": 395.47265625,
"completions/mean_terminated_length": 397.0235595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.0512,
"grad_norm": 0.22006112337112427,
"learning_rate": 4.25e-06,
"loss": -0.0051,
"num_tokens": 12132904.0,
"reward": 0.90234375,
"reward_std": 0.2499421089887619,
"rewards/accuracy_reward_step": 0.40625,
"rewards/format_reward_step": 0.9921875,
"step": 48
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 4.94921875,
"calib/ece": 0.40646586345381525,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.7309236947791165,
"calib/gap": -0.0025699300699302263,
"calib/mean_conf": 0.9365863453815261,
"calib/mu_c": 0.9353787878787879,
"calib/mu_w": 0.9379487179487181,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.40646586345381525,
"calib/std_conf": 0.03338941428420325,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7109045226130652,
"calib/step_q_c_n": 597.0,
"calib/step_q_gap": 0.05996422410560254,
"calib/step_q_w": 0.6509402985074627,
"calib/step_q_w_n": 670.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2863.0,
"completions/max_terminated_length": 2863.0,
"completions/mean_length": 429.953125,
"completions/mean_terminated_length": 435.0513916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.1909162700176239,
"learning_rate": 4.222222222222223e-06,
"loss": -0.0018,
"num_tokens": 12371316.0,
"reward": 1.001953125,
"reward_std": 0.24757641553878784,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.97265625,
"step": 49
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.0234375,
"calib/ece": 0.36492125984251966,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.7913385826771654,
"calib/gap": -0.00724505327245073,
"calib/mean_conf": 0.939724409448819,
"calib/mu_c": 0.9366438356164384,
"calib/mu_w": 0.9438888888888891,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.36492125984251966,
"calib/std_conf": 0.035052306733343724,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7165669515669515,
"calib/step_q_c_n": 702.0,
"calib/step_q_gap": 0.0019094173203761944,
"calib/step_q_w": 0.7146575342465753,
"calib/step_q_w_n": 584.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2441.0,
"completions/max_terminated_length": 2441.0,
"completions/mean_length": 438.9140625,
"completions/mean_terminated_length": 440.63531494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.2343200445175171,
"learning_rate": 4.194444444444445e-06,
"loss": -0.0039,
"num_tokens": 12612846.0,
"reward": 1.064453125,
"reward_std": 0.2998589277267456,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.98828125,
"step": 50
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.984375,
"calib/ece": 0.3821343873517785,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.7430830039525692,
"calib/gap": -0.0132604298356509,
"calib/mean_conf": 0.935494071146245,
"calib/mu_c": 0.9295714285714287,
"calib/mu_w": 0.9428318584070796,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3821343873517785,
"calib/std_conf": 0.03893154228400845,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6802950310559006,
"calib/step_q_c_n": 644.0,
"calib/step_q_gap": -0.01609737400739042,
"calib/step_q_w": 0.696392405063291,
"calib/step_q_w_n": 632.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2526.0,
"completions/max_terminated_length": 2526.0,
"completions/mean_length": 451.89453125,
"completions/mean_terminated_length": 453.66668701171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.0544,
"grad_norm": 0.18496379256248474,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0231,
"num_tokens": 12861635.0,
"reward": 1.041015625,
"reward_std": 0.210560142993927,
"rewards/accuracy_reward_step": 0.546875,
"rewards/format_reward_step": 0.98828125,
"step": 51
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.37109375,
"calib/ece": 0.2927559055118109,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6259842519685039,
"calib/gap": 0.018605053191489085,
"calib/mean_conf": 0.9226771653543308,
"calib/mu_c": 0.9295625,
"calib/mu_w": 0.9109574468085109,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2927559055118109,
"calib/std_conf": 0.07465351797668675,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7196594005449591,
"calib/step_q_c_n": 734.0,
"calib/step_q_gap": 0.037399660285219016,
"calib/step_q_w": 0.6822597402597401,
"calib/step_q_w_n": 385.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1329.0,
"completions/max_terminated_length": 1329.0,
"completions/mean_length": 424.953125,
"completions/mean_terminated_length": 426.61962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.2344915121793747,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0139,
"num_tokens": 13102183.0,
"reward": 1.125,
"reward_std": 0.29510045051574707,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/format_reward_step": 0.9921875,
"step": 52
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.37109375,
"calib/ece": 0.34669291338582686,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6614173228346457,
"calib/gap": 0.0040999490056095045,
"calib/mean_conf": 0.9293700787401574,
"calib/mu_c": 0.9310810810810811,
"calib/mu_w": 0.9269811320754716,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.34669291338582686,
"calib/std_conf": 0.037850681174162897,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.750516129032258,
"calib/step_q_c_n": 620.0,
"calib/step_q_gap": 0.0504359687116166,
"calib/step_q_w": 0.7000801603206414,
"calib/step_q_w_n": 499.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2408.0,
"completions/max_terminated_length": 2408.0,
"completions/mean_length": 460.73828125,
"completions/mean_terminated_length": 460.73828125,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.19638237357139587,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0294,
"num_tokens": 13349764.0,
"reward": 1.07421875,
"reward_std": 0.2590813636779785,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.9921875,
"step": 53
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.08984375,
"calib/ece": 0.20062745098039214,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6431372549019608,
"calib/gap": -0.010999999999999788,
"calib/mean_conf": 0.9258039215686273,
"calib/mu_c": 0.9230000000000004,
"calib/mu_w": 0.9340000000000002,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.19066666666666665,
"calib/std_conf": 0.07409612717985112,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7322883597883597,
"calib/step_q_c_n": 756.0,
"calib/step_q_gap": -0.009876588665248542,
"calib/step_q_w": 0.7421649484536083,
"calib/step_q_w_n": 291.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1341.0,
"completions/max_terminated_length": 1341.0,
"completions/mean_length": 415.76953125,
"completions/mean_terminated_length": 415.76953125,
"completions/min_length": 111.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.0576,
"grad_norm": 0.21822695434093475,
"learning_rate": 4.083333333333334e-06,
"loss": 0.0361,
"num_tokens": 13586241.0,
"reward": 1.234375,
"reward_std": 0.22762244939804077,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/format_reward_step": 0.984375,
"step": 54
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 3.60546875,
"calib/ece": 0.4306692913385826,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.5787401574803149,
"calib/gap": 0.021449329359165326,
"calib/mean_conf": 0.9109842519685039,
"calib/mu_c": 0.9221311475409836,
"calib/mu_w": 0.9006818181818183,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.4306692913385826,
"calib/std_conf": 0.09818939470388063,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7010098522167488,
"calib/step_q_c_n": 406.0,
"calib/step_q_gap": 0.010410239063944227,
"calib/step_q_w": 0.6905996131528046,
"calib/step_q_w_n": 517.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1533.0,
"completions/max_terminated_length": 1533.0,
"completions/mean_length": 425.453125,
"completions/mean_terminated_length": 427.12158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.24807067215442657,
"learning_rate": 4.055555555555556e-06,
"loss": 0.0025,
"num_tokens": 13826789.0,
"reward": 0.96484375,
"reward_std": 0.27189987897872925,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/format_reward_step": 0.9765625,
"step": 55
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.3125,
"calib/ece": 0.4680079681274901,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.6454183266932271,
"calib/gap": -0.001918263090677108,
"calib/mean_conf": 0.9279282868525895,
"calib/mu_c": 0.9268965517241378,
"calib/mu_w": 0.9288148148148149,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.4668924302788846,
"calib/std_conf": 0.04074616346730289,
"calib/step_conf_rate": 0.97265625,
"calib/step_q_c": 0.7365107212475633,
"calib/step_q_c_n": 513.0,
"calib/step_q_gap": 0.02109616964011818,
"calib/step_q_w": 0.7154145516074452,
"calib/step_q_w_n": 591.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2393.0,
"completions/max_terminated_length": 2393.0,
"completions/mean_length": 470.51171875,
"completions/mean_terminated_length": 472.3569030761719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.24617721140384674,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0402,
"num_tokens": 14077888.0,
"reward": 0.93359375,
"reward_std": 0.3343261778354645,
"rewards/accuracy_reward_step": 0.453125,
"rewards/format_reward_step": 0.9609375,
"step": 56
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.0859375,
"calib/ece": 0.31776000000000015,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.592,
"calib/gap": -0.014190386680988043,
"calib/mean_conf": 0.92576,
"calib/mu_c": 0.9201973684210527,
"calib/mu_w": 0.9343877551020408,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.31776000000000015,
"calib/std_conf": 0.03500317699866683,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7070627062706271,
"calib/step_q_c_n": 606.0,
"calib/step_q_gap": -0.025369111911191067,
"calib/step_q_w": 0.7324318181818181,
"calib/step_q_w_n": 440.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2594.0,
"completions/max_terminated_length": 2594.0,
"completions/mean_length": 465.53515625,
"completions/mean_terminated_length": 467.3608093261719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.0608,
"grad_norm": 0.18185299634933472,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0489,
"num_tokens": 14327665.0,
"reward": 1.078125,
"reward_std": 0.233200803399086,
"rewards/accuracy_reward_step": 0.59375,
"rewards/format_reward_step": 0.96875,
"step": 57
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/avg_num_step_conf": 4.80859375,
"calib/ece": 0.3945564516129034,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.5645161290322581,
"calib/gap": 0.0200873533246414,
"calib/mean_conf": 0.91875,
"calib/mu_c": 0.9283076923076924,
"calib/mu_w": 0.908220338983051,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3945564516129034,
"calib/std_conf": 0.07247114275529304,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7396908809891808,
"calib/step_q_c_n": 647.0,
"calib/step_q_gap": 0.04722512756452324,
"calib/step_q_w": 0.6924657534246575,
"calib/step_q_w_n": 584.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2566.0,
"completions/max_terminated_length": 2566.0,
"completions/mean_length": 498.38671875,
"completions/mean_terminated_length": 504.29644775390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.22465580701828003,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0199,
"num_tokens": 14585380.0,
"reward": 0.984375,
"reward_std": 0.3242889642715454,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/format_reward_step": 0.953125,
"step": 58
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.14453125,
"calib/ece": 0.37674509803921574,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6352941176470588,
"calib/gap": -0.003002986188876511,
"calib/mean_conf": 0.9277254901960784,
"calib/mu_c": 0.9263829787234042,
"calib/mu_w": 0.9293859649122808,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.375764705882353,
"calib/std_conf": 0.04088629783007876,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7276872964169381,
"calib/step_q_c_n": 614.0,
"calib/step_q_gap": 0.006479242725662915,
"calib/step_q_w": 0.7212080536912752,
"calib/step_q_w_n": 447.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1857.0,
"completions/max_terminated_length": 1857.0,
"completions/mean_length": 465.66796875,
"completions/mean_terminated_length": 465.66796875,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.2273540198802948,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0299,
"num_tokens": 14834647.0,
"reward": 1.044921875,
"reward_std": 0.32542872428894043,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.98828125,
"step": 59
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.07421875,
"calib/ece": 0.3562549800796812,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5617529880478087,
"calib/gap": -0.00657536907536882,
"calib/mean_conf": 0.9259760956175299,
"calib/mu_c": 0.9231468531468535,
"calib/mu_w": 0.9297222222222223,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3562549800796812,
"calib/std_conf": 0.03610718089585278,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7030560271646858,
"calib/step_q_c_n": 589.0,
"calib/step_q_gap": 0.00028069676821007583,
"calib/step_q_w": 0.7027753303964758,
"calib/step_q_w_n": 454.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2666.0,
"completions/max_terminated_length": 2666.0,
"completions/mean_length": 479.28125,
"completions/mean_terminated_length": 479.28125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.064,
"grad_norm": 0.21452797949314117,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0465,
"num_tokens": 15090007.0,
"reward": 1.048828125,
"reward_std": 0.299457311630249,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.98046875,
"step": 60
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.23046875,
"calib/ece": 0.3366666666666667,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.5450980392156862,
"calib/gap": 0.00040000000000006697,
"calib/mean_conf": 0.9249019607843139,
"calib/mu_c": 0.9250666666666668,
"calib/mu_w": 0.9246666666666667,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3366666666666667,
"calib/std_conf": 0.03846936897190413,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7230077519379845,
"calib/step_q_c_n": 645.0,
"calib/step_q_gap": 0.042984920887756406,
"calib/step_q_w": 0.6800228310502281,
"calib/step_q_w_n": 438.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1991.0,
"completions/max_terminated_length": 1991.0,
"completions/mean_length": 406.60546875,
"completions/mean_terminated_length": 406.60546875,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.16886036098003387,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0132,
"num_tokens": 15321970.0,
"reward": 1.08203125,
"reward_std": 0.16696026921272278,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.9921875,
"step": 61
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.37109375,
"calib/ece": 0.4346000000000002,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.552,
"calib/gap": -0.007550732987644748,
"calib/mean_conf": 0.9266,
"calib/mu_c": 0.9227642276422765,
"calib/mu_w": 0.9303149606299213,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4346000000000002,
"calib/std_conf": 0.035755279330470895,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7012295081967213,
"calib/step_q_c_n": 488.0,
"calib/step_q_gap": -0.02986399418045771,
"calib/step_q_w": 0.731093502377179,
"calib/step_q_w_n": 631.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2762.0,
"completions/max_terminated_length": 2762.0,
"completions/mean_length": 484.98828125,
"completions/mean_terminated_length": 486.8902282714844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.20834773778915405,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0574,
"num_tokens": 15577015.0,
"reward": 0.970703125,
"reward_std": 0.3153229355812073,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.97265625,
"step": 62
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.4453125,
"calib/ece": 0.3520634920634922,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6190476190476191,
"calib/gap": -0.009914706642543014,
"calib/mean_conf": 0.9294444444444444,
"calib/mu_c": 0.9252739726027399,
"calib/mu_w": 0.9351886792452829,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3510714285714287,
"calib/std_conf": 0.03926518623524129,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.708941717791411,
"calib/step_q_c_n": 652.0,
"calib/step_q_gap": -0.0004204221262843477,
"calib/step_q_w": 0.7093621399176954,
"calib/step_q_w_n": 486.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2338.0,
"completions/max_terminated_length": 2338.0,
"completions/mean_length": 509.08984375,
"completions/mean_terminated_length": 509.08984375,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.0672,
"grad_norm": 0.2037605196237564,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0678,
"num_tokens": 15839790.0,
"reward": 1.0625,
"reward_std": 0.32739830017089844,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.984375,
"step": 63
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.06640625,
"calib/ece": 0.3255731225296444,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4624505928853755,
"calib/gap": -0.00740711974110031,
"calib/mean_conf": 0.9164822134387353,
"calib/mu_c": 0.9134666666666669,
"calib/mu_w": 0.9208737864077672,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32458498023715426,
"calib/std_conf": 0.04057731457775138,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6860690789473685,
"calib/step_q_c_n": 608.0,
"calib/step_q_gap": 0.0013346678619180485,
"calib/step_q_w": 0.6847344110854504,
"calib/step_q_w_n": 433.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1202.0,
"completions/max_terminated_length": 1202.0,
"completions/mean_length": 413.8125,
"completions/mean_terminated_length": 417.07086181640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.23701032996177673,
"learning_rate": 3.8055555555555556e-06,
"loss": -0.0187,
"num_tokens": 16073310.0,
"reward": 1.080078125,
"reward_std": 0.30969738960266113,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.98828125,
"step": 64
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 3.77734375,
"calib/ece": 0.3484800000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.46,
"calib/gap": 0.016298904538341374,
"calib/mean_conf": 0.9164800000000001,
"calib/mu_c": 0.9235211267605636,
"calib/mu_w": 0.9072222222222223,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3484800000000001,
"calib/std_conf": 0.04960251606521587,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.6870819112627986,
"calib/step_q_c_n": 586.0,
"calib/step_q_gap": 0.014850940134189639,
"calib/step_q_w": 0.672230971128609,
"calib/step_q_w_n": 381.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3055.0,
"completions/max_terminated_length": 3055.0,
"completions/mean_length": 395.453125,
"completions/mean_terminated_length": 397.0039367675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.1639893501996994,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0455,
"num_tokens": 16303378.0,
"reward": 1.044921875,
"reward_std": 0.1631406545639038,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.97265625,
"step": 65
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 4.37109375,
"calib/ece": 0.43380952380952376,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.5753968253968254,
"calib/gap": -0.00822832661290318,
"calib/mean_conf": 0.9258730158730158,
"calib/mu_c": 0.9216935483870968,
"calib/mu_w": 0.929921875,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.43380952380952376,
"calib/std_conf": 0.039866241462731676,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.6984412955465586,
"calib/step_q_c_n": 494.0,
"calib/step_q_gap": -0.0016107044534413584,
"calib/step_q_w": 0.700052,
"calib/step_q_w_n": 625.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2367.0,
"completions/max_terminated_length": 2367.0,
"completions/mean_length": 479.359375,
"completions/mean_terminated_length": 481.2392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.0704,
"grad_norm": 0.19545036554336548,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0222,
"num_tokens": 16556254.0,
"reward": 0.97265625,
"reward_std": 0.2654076814651489,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.9765625,
"step": 66
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.171875,
"calib/ece": 0.3474803149606299,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.547244094488189,
"calib/gap": -0.0065306122448981485,
"calib/mean_conf": 0.9262204724409449,
"calib/mu_c": 0.923469387755102,
"calib/mu_w": 0.9300000000000002,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3474803149606299,
"calib/std_conf": 0.03906241994225783,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.690810372771475,
"calib/step_q_c_n": 617.0,
"calib/step_q_gap": -0.01570847423517696,
"calib/step_q_w": 0.7065188470066519,
"calib/step_q_w_n": 451.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2231.0,
"completions/max_terminated_length": 2231.0,
"completions/mean_length": 451.296875,
"completions/mean_terminated_length": 451.296875,
"completions/min_length": 161.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.1560167670249939,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0265,
"num_tokens": 16800602.0,
"reward": 1.0703125,
"reward_std": 0.15099012851715088,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.9921875,
"step": 67
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.17578125,
"calib/ece": 0.3788627450980392,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4745098039215686,
"calib/gap": -0.016532846715328398,
"calib/mean_conf": 0.9161176470588235,
"calib/mu_c": 0.9084671532846716,
"calib/mu_w": 0.925,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3788627450980392,
"calib/std_conf": 0.043496967241056575,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7010873440285204,
"calib/step_q_c_n": 561.0,
"calib/step_q_gap": 0.011736950327732809,
"calib/step_q_w": 0.6893503937007875,
"calib/step_q_w_n": 508.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2867.0,
"completions/max_terminated_length": 2867.0,
"completions/mean_length": 419.78125,
"completions/mean_terminated_length": 419.78125,
"completions/min_length": 148.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.1673060655593872,
"learning_rate": 3.694444444444445e-06,
"loss": 0.0282,
"num_tokens": 17035962.0,
"reward": 1.03125,
"reward_std": 0.15729427337646484,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/format_reward_step": 0.9921875,
"step": 68
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 4.34765625,
"calib/ece": 0.45079681274900396,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5896414342629482,
"calib/gap": 0.0021867838044310473,
"calib/mean_conf": 0.9249003984063744,
"calib/mu_c": 0.9260504201680674,
"calib/mu_w": 0.9238636363636363,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.45079681274900396,
"calib/std_conf": 0.041967190275287995,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7208226452905812,
"calib/step_q_c_n": 499.0,
"calib/step_q_gap": 0.03166955082804035,
"calib/step_q_w": 0.6891530944625408,
"calib/step_q_w_n": 614.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2934.0,
"completions/max_terminated_length": 2934.0,
"completions/mean_length": 522.390625,
"completions/mean_terminated_length": 524.4392700195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.0736,
"grad_norm": 0.21035178005695343,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0529,
"num_tokens": 17297998.0,
"reward": 0.955078125,
"reward_std": 0.3176359534263611,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/format_reward_step": 0.98046875,
"step": 69
},
{
"calib/answer_extract_rate": 0.9453125,
"calib/avg_num_step_conf": 4.58984375,
"calib/ece": 0.44493775933609964,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.6058091286307054,
"calib/gap": -0.016115936035290823,
"calib/mean_conf": 0.9270954356846473,
"calib/mu_c": 0.9188034188034188,
"calib/mu_w": 0.9349193548387096,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.44327800829875524,
"calib/std_conf": 0.04887815339783165,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.6921166306695464,
"calib/step_q_c_n": 463.0,
"calib/step_q_gap": -0.043557526633824284,
"calib/step_q_w": 0.7356741573033707,
"calib/step_q_w_n": 712.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2650.0,
"completions/max_terminated_length": 2650.0,
"completions/mean_length": 571.875,
"completions/mean_terminated_length": 574.11767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.21151414513587952,
"learning_rate": 3.638888888888889e-06,
"loss": 0.1262,
"num_tokens": 17575198.0,
"reward": 0.923828125,
"reward_std": 0.35245469212532043,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/format_reward_step": 0.93359375,
"step": 70
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.1484375,
"calib/ece": 0.40937007874015746,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.7086614173228346,
"calib/gap": -0.0026392779333955785,
"calib/mean_conf": 0.9408661417322836,
"calib/mu_c": 0.9396296296296297,
"calib/mu_w": 0.9422689075630253,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.40937007874015746,
"calib/std_conf": 0.03520521664206253,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7219156804733727,
"calib/step_q_c_n": 676.0,
"calib/step_q_gap": -0.0016941326107394739,
"calib/step_q_w": 0.7236098130841122,
"calib/step_q_w_n": 642.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1966.0,
"completions/max_terminated_length": 1966.0,
"completions/mean_length": 498.20703125,
"completions/mean_terminated_length": 500.16082763671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.1976437270641327,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0358,
"num_tokens": 17830955.0,
"reward": 1.021484375,
"reward_std": 0.31583717465400696,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.98828125,
"step": 71
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.44921875,
"calib/ece": 0.4215139442231076,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.6772908366533864,
"calib/gap": -0.011867454568560087,
"calib/mean_conf": 0.9354581673306773,
"calib/mu_c": 0.9296899224806202,
"calib/mu_w": 0.9415573770491803,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.4215139442231076,
"calib/std_conf": 0.033353252991088246,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7225493716337522,
"calib/step_q_c_n": 557.0,
"calib/step_q_gap": 0.000298512527223016,
"calib/step_q_w": 0.7222508591065292,
"calib/step_q_w_n": 582.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2346.0,
"completions/max_terminated_length": 2346.0,
"completions/mean_length": 466.0625,
"completions/mean_terminated_length": 467.8902282714844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.0768,
"grad_norm": 0.23634567856788635,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0312,
"num_tokens": 18078483.0,
"reward": 0.994140625,
"reward_std": 0.2701350450515747,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/format_reward_step": 0.98046875,
"step": 72
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 4.57421875,
"calib/ece": 0.3489682539682539,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": -0.004851762559457962,
"calib/mean_conf": 0.9402380952380952,
"calib/mu_c": 0.9382550335570471,
"calib/mu_w": 0.943106796116505,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3489682539682539,
"calib/std_conf": 0.031859420517908986,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7216890881913303,
"calib/step_q_c_n": 669.0,
"calib/step_q_gap": -0.010103740493928703,
"calib/step_q_w": 0.731792828685259,
"calib/step_q_w_n": 502.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2751.0,
"completions/max_terminated_length": 2751.0,
"completions/mean_length": 482.48046875,
"completions/mean_terminated_length": 482.48046875,
"completions/min_length": 182.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.21767885982990265,
"learning_rate": 3.555555555555556e-06,
"loss": 0.049,
"num_tokens": 18332838.0,
"reward": 1.072265625,
"reward_std": 0.3525664806365967,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.98046875,
"step": 73
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/avg_num_step_conf": 4.765625,
"calib/ece": 0.40681632653061217,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.710204081632653,
"calib/gap": -0.00812374581939801,
"calib/mean_conf": 0.9374285714285714,
"calib/mu_c": 0.9336153846153846,
"calib/mu_w": 0.9417391304347826,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.40681632653061217,
"calib/std_conf": 0.034409301068170486,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7151140939597316,
"calib/step_q_c_n": 596.0,
"calib/step_q_gap": -0.023107059886422254,
"calib/step_q_w": 0.7382211538461538,
"calib/step_q_w_n": 624.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2698.0,
"completions/max_terminated_length": 2698.0,
"completions/mean_length": 518.82421875,
"completions/mean_terminated_length": 518.82421875,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.19470734894275665,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0318,
"num_tokens": 18593393.0,
"reward": 0.98828125,
"reward_std": 0.26092615723609924,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.953125,
"step": 74
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.109375,
"calib/ece": 0.265,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.7559055118110236,
"calib/gap": -0.011919540229885284,
"calib/mean_conf": 0.9378346456692913,
"calib/mu_c": 0.9340804597701148,
"calib/mu_w": 0.9460000000000001,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2588976377952756,
"calib/std_conf": 0.037799150884002854,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7347410817031069,
"calib/step_q_c_n": 869.0,
"calib/step_q_gap": -0.017901287317394154,
"calib/step_q_w": 0.7526423690205011,
"calib/step_q_w_n": 439.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3040.0,
"completions/max_terminated_length": 3040.0,
"completions/mean_length": 478.08203125,
"completions/mean_terminated_length": 478.08203125,
"completions/min_length": 170.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.08,
"grad_norm": 0.18703636527061462,
"learning_rate": 3.5e-06,
"loss": 0.0082,
"num_tokens": 18844342.0,
"reward": 1.17578125,
"reward_std": 0.20858918130397797,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/format_reward_step": 0.9921875,
"step": 75
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.3125,
"calib/ece": 0.30292490118577065,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.6798418972332015,
"calib/gap": -0.0008457112270842115,
"calib/mean_conf": 0.9313833992094861,
"calib/mu_c": 0.9310691823899372,
"calib/mu_w": 0.9319148936170214,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30292490118577065,
"calib/std_conf": 0.038066709194776505,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7095245398773007,
"calib/step_q_c_n": 652.0,
"calib/step_q_gap": 0.014502415983495442,
"calib/step_q_w": 0.6950221238938052,
"calib/step_q_w_n": 452.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2105.0,
"completions/max_terminated_length": 2105.0,
"completions/mean_length": 468.44921875,
"completions/mean_terminated_length": 468.44921875,
"completions/min_length": 183.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.22163182497024536,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.048,
"num_tokens": 19091129.0,
"reward": 1.111328125,
"reward_std": 0.2719145119190216,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/format_reward_step": 0.98046875,
"step": 76
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 5.0625,
"calib/ece": 0.3633596837944664,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.6996047430830039,
"calib/gap": -0.0018869731800768008,
"calib/mean_conf": 0.9325296442687747,
"calib/mu_c": 0.9317241379310346,
"calib/mu_w": 0.9336111111111114,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36138339920948614,
"calib/std_conf": 0.04607868453686756,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7270705725699067,
"calib/step_q_c_n": 751.0,
"calib/step_q_gap": -0.0004707118337630156,
"calib/step_q_w": 0.7275412844036697,
"calib/step_q_w_n": 545.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1839.0,
"completions/max_terminated_length": 1839.0,
"completions/mean_length": 476.38671875,
"completions/mean_terminated_length": 478.25494384765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.21457961201667786,
"learning_rate": 3.444444444444445e-06,
"loss": 0.021,
"num_tokens": 19341556.0,
"reward": 1.056640625,
"reward_std": 0.28331270813941956,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.98046875,
"step": 77
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.3671875,
"calib/ece": 0.3709523809523809,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.7936507936507936,
"calib/gap": -4.629629629648857e-05,
"calib/mean_conf": 0.9423809523809523,
"calib/mu_c": 0.942361111111111,
"calib/mu_w": 0.9424074074074075,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3709523809523809,
"calib/std_conf": 0.030156432661026095,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7423627075351213,
"calib/step_q_c_n": 783.0,
"calib/step_q_gap": 0.0015843657415511858,
"calib/step_q_w": 0.7407783417935702,
"calib/step_q_w_n": 591.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2333.0,
"completions/max_terminated_length": 2333.0,
"completions/mean_length": 538.58203125,
"completions/mean_terminated_length": 540.6941528320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.0,
"epoch": 0.0832,
"grad_norm": 0.20668213069438934,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0296,
"num_tokens": 19611265.0,
"reward": 1.05859375,
"reward_std": 0.28108295798301697,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.984375,
"step": 78
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.41796875,
"calib/ece": 0.3499218749999999,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.765625,
"calib/gap": -0.004756859035004668,
"calib/mean_conf": 0.9397656249999999,
"calib/mu_c": 0.937814569536424,
"calib/mu_w": 0.9425714285714286,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3499218749999999,
"calib/std_conf": 0.0363890617955365,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7532355658198613,
"calib/step_q_c_n": 866.0,
"calib/step_q_gap": 0.029415988084736444,
"calib/step_q_w": 0.7238195777351248,
"calib/step_q_w_n": 521.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1599.0,
"completions/max_terminated_length": 1599.0,
"completions/mean_length": 517.94140625,
"completions/mean_terminated_length": 519.9725952148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.18805666267871857,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0299,
"num_tokens": 19874042.0,
"reward": 1.08984375,
"reward_std": 0.25407272577285767,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 1.0,
"step": 79
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 5.68359375,
"calib/ece": 0.2543529411764706,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.7372549019607844,
"calib/gap": -0.004646658152405281,
"calib/mean_conf": 0.9367058823529413,
"calib/mu_c": 0.9352298850574714,
"calib/mu_w": 0.9398765432098767,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2543529411764706,
"calib/std_conf": 0.034210016947022547,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7527200791295746,
"calib/step_q_c_n": 1011.0,
"calib/step_q_gap": 0.007157016066511646,
"calib/step_q_w": 0.745563063063063,
"calib/step_q_w_n": 444.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2502.0,
"completions/max_terminated_length": 2502.0,
"completions/mean_length": 486.953125,
"completions/mean_terminated_length": 486.953125,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.20187772810459137,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0085,
"num_tokens": 20124670.0,
"reward": 1.177734375,
"reward_std": 0.29367175698280334,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/format_reward_step": 0.99609375,
"step": 80
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 5.328125,
"calib/ece": 0.3355158730158729,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.6626984126984127,
"calib/gap": -0.01723707664884122,
"calib/mean_conf": 0.9311507936507937,
"calib/mu_c": 0.9243790849673204,
"calib/mu_w": 0.9416161616161616,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.32976190476190465,
"calib/std_conf": 0.03569971042973518,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7176338028169014,
"calib/step_q_c_n": 710.0,
"calib/step_q_gap": -0.04826833785588158,
"calib/step_q_w": 0.765902140672783,
"calib/step_q_w_n": 654.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3030.0,
"completions/max_terminated_length": 3030.0,
"completions/mean_length": 513.9453125,
"completions/mean_terminated_length": 515.9608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.0864,
"grad_norm": 0.19790305197238922,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0468,
"num_tokens": 20386296.0,
"reward": 1.0859375,
"reward_std": 0.2682061493396759,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.9765625,
"step": 81
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.8515625,
"calib/ece": 0.31055555555555553,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6150793650793651,
"calib/gap": 0.00011040904556058884,
"calib/mean_conf": 0.9256349206349206,
"calib/mu_c": 0.9256774193548388,
"calib/mu_w": 0.9255670103092782,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31055555555555553,
"calib/std_conf": 0.04877867136924964,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7049134199134199,
"calib/step_q_c_n": 693.0,
"calib/step_q_gap": -0.014102973529202978,
"calib/step_q_w": 0.7190163934426229,
"calib/step_q_w_n": 549.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2969.0,
"completions/max_terminated_length": 2969.0,
"completions/mean_length": 438.8359375,
"completions/mean_terminated_length": 442.2913513183594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 201.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.23972539603710175,
"learning_rate": 3.3055555555555558e-06,
"loss": -0.0094,
"num_tokens": 20627998.0,
"reward": 1.09765625,
"reward_std": 0.2942523658275604,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.984375,
"step": 82
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 5.234375,
"calib/ece": 0.34218253968253964,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.7420634920634921,
"calib/gap": -0.008121568627451059,
"calib/mean_conf": 0.937420634920635,
"calib/mu_c": 0.9341333333333334,
"calib/mu_w": 0.9422549019607844,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.34218253968253964,
"calib/std_conf": 0.032429619318158576,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7267061923583662,
"calib/step_q_c_n": 759.0,
"calib/step_q_gap": -0.005428058932511637,
"calib/step_q_w": 0.7321342512908778,
"calib/step_q_w_n": 581.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2298.0,
"completions/max_terminated_length": 2298.0,
"completions/mean_length": 548.875,
"completions/mean_terminated_length": 548.875,
"completions/min_length": 181.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.1901319921016693,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0438,
"num_tokens": 20899582.0,
"reward": 1.080078125,
"reward_std": 0.1934780478477478,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.98046875,
"step": 83
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 4.2578125,
"calib/ece": 0.3473725490196079,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.6392156862745098,
"calib/gap": 0.00971844293272861,
"calib/mean_conf": 0.9238431372549019,
"calib/mu_c": 0.9279591836734694,
"calib/mu_w": 0.9182407407407408,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3473725490196079,
"calib/std_conf": 0.054500718667331985,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7106328124999999,
"calib/step_q_c_n": 640.0,
"calib/step_q_gap": 0.007455034722222087,
"calib/step_q_w": 0.7031777777777778,
"calib/step_q_w_n": 450.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2025.0,
"completions/max_terminated_length": 2025.0,
"completions/mean_length": 456.90625,
"completions/mean_terminated_length": 456.90625,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.0896,
"grad_norm": 0.26204726099967957,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0192,
"num_tokens": 21146278.0,
"reward": 1.072265625,
"reward_std": 0.25500500202178955,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.99609375,
"step": 84
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/avg_num_step_conf": 4.65625,
"calib/ece": 0.40574297188755,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.7269076305220884,
"calib/gap": -0.0045426316470432715,
"calib/mean_conf": 0.931847389558233,
"calib/mu_c": 0.9296946564885497,
"calib/mu_w": 0.9342372881355929,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.40574297188755,
"calib/std_conf": 0.03905254419426512,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7091607142857143,
"calib/step_q_c_n": 560.0,
"calib/step_q_gap": -0.007326627486437642,
"calib/step_q_w": 0.7164873417721519,
"calib/step_q_w_n": 632.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2913.0,
"completions/max_terminated_length": 2913.0,
"completions/mean_length": 532.96875,
"completions/mean_terminated_length": 537.1653442382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.16692368686199188,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.038,
"num_tokens": 21414350.0,
"reward": 0.998046875,
"reward_std": 0.1843021810054779,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.97265625,
"step": 85
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.765625,
"calib/ece": 0.397755905511811,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.6259842519685039,
"calib/gap": 0.004126361655773514,
"calib/mean_conf": 0.929251968503937,
"calib/mu_c": 0.9311851851851853,
"calib/mu_w": 0.9270588235294118,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.397755905511811,
"calib/std_conf": 0.03826749376170533,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7281309904153355,
"calib/step_q_c_n": 626.0,
"calib/step_q_gap": 0.017033347317692238,
"calib/step_q_w": 0.7110976430976432,
"calib/step_q_w_n": 594.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1937.0,
"completions/max_terminated_length": 1937.0,
"completions/mean_length": 487.19921875,
"completions/mean_terminated_length": 489.1098327636719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.22901059687137604,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0125,
"num_tokens": 21668393.0,
"reward": 1.015625,
"reward_std": 0.24991528689861298,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.9765625,
"step": 86
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 4.125,
"calib/ece": 0.1978571428571427,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6706349206349206,
"calib/gap": -0.008534086325131418,
"calib/mean_conf": 0.9288095238095239,
"calib/mu_c": 0.9265405405405405,
"calib/mu_w": 0.9350746268656719,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.19626984126984112,
"calib/std_conf": 0.04113676652449365,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7102567567567568,
"calib/step_q_c_n": 740.0,
"calib/step_q_gap": 0.016775744098529,
"calib/step_q_w": 0.6934810126582278,
"calib/step_q_w_n": 316.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2655.0,
"completions/max_terminated_length": 2655.0,
"completions/mean_length": 445.6796875,
"completions/mean_terminated_length": 450.9644470214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.0928,
"grad_norm": 0.20625683665275574,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0095,
"num_tokens": 21911791.0,
"reward": 1.21484375,
"reward_std": 0.21918149292469025,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/format_reward_step": 0.984375,
"step": 87
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.58203125,
"calib/ece": 0.35972549019607836,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.7137254901960784,
"calib/gap": 0.0007678773407061579,
"calib/mean_conf": 0.9322745098039215,
"calib/mu_c": 0.9326027397260274,
"calib/mu_w": 0.9318348623853212,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35972549019607836,
"calib/std_conf": 0.03309408953294543,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7127668539325842,
"calib/step_q_c_n": 712.0,
"calib/step_q_gap": 0.04421088863974254,
"calib/step_q_w": 0.6685559652928417,
"calib/step_q_w_n": 461.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1309.0,
"completions/max_terminated_length": 1309.0,
"completions/mean_length": 485.171875,
"completions/mean_terminated_length": 487.07452392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.2110147923231125,
"learning_rate": 3.138888888888889e-06,
"loss": -0.0216,
"num_tokens": 22169651.0,
"reward": 1.068359375,
"reward_std": 0.27117839455604553,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.99609375,
"step": 88
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.86328125,
"calib/ece": 0.43505928853754944,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6798418972332015,
"calib/gap": -0.004131874999999785,
"calib/mean_conf": 0.9291304347826087,
"calib/mu_c": 0.9270400000000001,
"calib/mu_w": 0.9311718749999999,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.43505928853754944,
"calib/std_conf": 0.040030062598860244,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6985401459854015,
"calib/step_q_c_n": 548.0,
"calib/step_q_gap": 0.037421064206348276,
"calib/step_q_w": 0.6611190817790532,
"calib/step_q_w_n": 697.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2563.0,
"completions/max_terminated_length": 2563.0,
"completions/mean_length": 505.09765625,
"completions/mean_terminated_length": 507.0784606933594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.19138966500759125,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0163,
"num_tokens": 22431652.0,
"reward": 0.98046875,
"reward_std": 0.22767284512519836,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 0.984375,
"step": 89
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 4.921875,
"calib/ece": 0.3004724409448818,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6259842519685039,
"calib/gap": -0.0036425024826215457,
"calib/mean_conf": 0.9264566929133858,
"calib/mu_c": 0.9250943396226418,
"calib/mu_w": 0.9287368421052633,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3004724409448818,
"calib/std_conf": 0.03864898688825327,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6829776021080369,
"calib/step_q_c_n": 759.0,
"calib/step_q_gap": -0.029058326035675708,
"calib/step_q_w": 0.7120359281437126,
"calib/step_q_w_n": 501.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2971.0,
"completions/max_terminated_length": 2971.0,
"completions/mean_length": 485.05859375,
"completions/mean_terminated_length": 486.9608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.096,
"grad_norm": 0.18919505178928375,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.021,
"num_tokens": 22682955.0,
"reward": 1.1171875,
"reward_std": 0.24751797318458557,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/format_reward_step": 0.9921875,
"step": 90
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.29296875,
"calib/ece": 0.35011811023622047,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.65748031496063,
"calib/gap": 0.0019028545997843427,
"calib/mean_conf": 0.9288582677165355,
"calib/mu_c": 0.9296598639455784,
"calib/mu_w": 0.9277570093457941,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35011811023622047,
"calib/std_conf": 0.03799963207643035,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7166071428571429,
"calib/step_q_c_n": 728.0,
"calib/step_q_gap": 0.009137711703501195,
"calib/step_q_w": 0.7074694311536417,
"calib/step_q_w_n": 627.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2022.0,
"completions/max_terminated_length": 2022.0,
"completions/mean_length": 513.4921875,
"completions/mean_terminated_length": 515.5059204101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.18537934124469757,
"learning_rate": 3.055555555555556e-06,
"loss": -0.0098,
"num_tokens": 22945929.0,
"reward": 1.0703125,
"reward_std": 0.254516065120697,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.9921875,
"step": 91
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.83203125,
"calib/ece": 0.32298039215686275,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.6196078431372549,
"calib/gap": 0.002903433200462824,
"calib/mean_conf": 0.9269019607843137,
"calib/mu_c": 0.9280519480519481,
"calib/mu_w": 0.9251485148514853,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32298039215686275,
"calib/std_conf": 0.041171689731792756,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7056141304347826,
"calib/step_q_c_n": 736.0,
"calib/step_q_gap": 0.025239878937776572,
"calib/step_q_w": 0.680374251497006,
"calib/step_q_w_n": 501.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3016.0,
"completions/max_terminated_length": 3016.0,
"completions/mean_length": 480.91796875,
"completions/mean_terminated_length": 480.91796875,
"completions/min_length": 162.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.28069862723350525,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.0081,
"num_tokens": 23199572.0,
"reward": 1.099609375,
"reward_std": 0.282889187335968,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/format_reward_step": 0.99609375,
"step": 92
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.48046875,
"calib/ece": 0.43621093749999984,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.67578125,
"calib/gap": 0.005967032967033048,
"calib/mean_conf": 0.9283984374999998,
"calib/mu_c": 0.9314285714285715,
"calib/mu_w": 0.9254615384615384,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.43621093749999984,
"calib/std_conf": 0.04112876879458698,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.69,
"calib/step_q_c_n": 653.0,
"calib/step_q_gap": -0.03217333333333339,
"calib/step_q_w": 0.7221733333333333,
"calib/step_q_w_n": 750.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1345.0,
"completions/max_terminated_length": 1345.0,
"completions/mean_length": 509.3671875,
"completions/mean_terminated_length": 511.36474609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.0992,
"grad_norm": 0.2235431671142578,
"learning_rate": 3e-06,
"loss": 0.0208,
"num_tokens": 23459554.0,
"reward": 0.9921875,
"reward_std": 0.29287609457969666,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/format_reward_step": 1.0,
"step": 93
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.96875,
"calib/ece": 0.3397254901960783,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.7686274509803922,
"calib/gap": 0.007563041263372372,
"calib/mean_conf": 0.9318823529411765,
"calib/mu_c": 0.9349668874172186,
"calib/mu_w": 0.9274038461538462,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3397254901960783,
"calib/std_conf": 0.047522810646241954,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7172486772486772,
"calib/step_q_c_n": 756.0,
"calib/step_q_gap": 0.11901224314015013,
"calib/step_q_w": 0.5982364341085271,
"calib/step_q_w_n": 516.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2091.0,
"completions/max_terminated_length": 2091.0,
"completions/mean_length": 462.046875,
"completions/mean_terminated_length": 463.8588562011719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.18933187425136566,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0087,
"num_tokens": 23710326.0,
"reward": 1.087890625,
"reward_std": 0.2208297997713089,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.99609375,
"step": 94
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.28125,
"calib/ece": 0.354584980237154,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6996047430830039,
"calib/gap": 0.017085244648318065,
"calib/mean_conf": 0.9174308300395257,
"calib/mu_c": 0.9247916666666668,
"calib/mu_w": 0.9077064220183487,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3514229249011856,
"calib/std_conf": 0.09339088747697985,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6632329842931937,
"calib/step_q_c_n": 764.0,
"calib/step_q_gap": 0.07512073939523445,
"calib/step_q_w": 0.5881122448979592,
"calib/step_q_w_n": 588.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2246.0,
"completions/max_terminated_length": 2246.0,
"completions/mean_length": 486.41796875,
"completions/mean_terminated_length": 488.3255310058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.21531540155410767,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0167,
"num_tokens": 23964785.0,
"reward": 1.056640625,
"reward_std": 0.2692474126815796,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.98828125,
"step": 95
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 4.953125,
"calib/ece": 0.25441406250000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.73828125,
"calib/gap": -0.003654331370900321,
"calib/mean_conf": 0.9341015624999999,
"calib/mu_c": 0.9329310344827585,
"calib/mu_w": 0.9365853658536588,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.25441406250000004,
"calib/std_conf": 0.037634828418083584,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6484816132858838,
"calib/step_q_c_n": 843.0,
"calib/step_q_gap": -0.0004242690670573923,
"calib/step_q_w": 0.6489058823529412,
"calib/step_q_w_n": 425.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 999.0,
"completions/max_terminated_length": 999.0,
"completions/mean_length": 445.0234375,
"completions/mean_terminated_length": 446.7686462402344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.1024,
"grad_norm": 0.21615636348724365,
"learning_rate": 2.916666666666667e-06,
"loss": 0.0125,
"num_tokens": 24208335.0,
"reward": 1.1796875,
"reward_std": 0.22514329850673676,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/format_reward_step": 1.0,
"step": 96
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 4.95703125,
"calib/ece": 0.3588976377952755,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6338582677165354,
"calib/gap": 0.022059875729618095,
"calib/mean_conf": 0.914015748031496,
"calib/mu_c": 0.9238297872340427,
"calib/mu_w": 0.9017699115044246,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3588976377952755,
"calib/std_conf": 0.09175678088628476,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6680576631259484,
"calib/step_q_c_n": 659.0,
"calib/step_q_gap": 0.00863143361775165,
"calib/step_q_w": 0.6594262295081967,
"calib/step_q_w_n": 610.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2596.0,
"completions/max_terminated_length": 2596.0,
"completions/mean_length": 471.91796875,
"completions/mean_terminated_length": 471.91796875,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.19775576889514923,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0527,
"num_tokens": 24458026.0,
"reward": 1.046875,
"reward_std": 0.2200184315443039,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.9921875,
"step": 97
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 4.921875,
"calib/ece": 0.38138339920948616,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6047430830039525,
"calib/gap": 0.034307036247334666,
"calib/mean_conf": 0.9110276679841898,
"calib/mu_c": 0.9271641791044777,
"calib/mu_w": 0.892857142857143,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.38138339920948616,
"calib/std_conf": 0.11173325106066419,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.646115569823435,
"calib/step_q_c_n": 623.0,
"calib/step_q_gap": 0.06539343481558568,
"calib/step_q_w": 0.5807221350078493,
"calib/step_q_w_n": 637.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2462.0,
"completions/max_terminated_length": 2462.0,
"completions/mean_length": 514.59375,
"completions/mean_terminated_length": 514.59375,
"completions/min_length": 153.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.21482080221176147,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0683,
"num_tokens": 24719754.0,
"reward": 1.015625,
"reward_std": 0.30873024463653564,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/format_reward_step": 0.984375,
"step": 98
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.4453125,
"calib/ece": 0.5184313725490195,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.5568627450980392,
"calib/gap": 0.004172560113154256,
"calib/mean_conf": 0.9145098039215687,
"calib/mu_c": 0.9170297029702972,
"calib/mu_w": 0.9128571428571429,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5184313725490195,
"calib/std_conf": 0.05420056166014588,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6449285714285714,
"calib/step_q_c_n": 560.0,
"calib/step_q_gap": 0.05164319972593345,
"calib/step_q_w": 0.593285371702638,
"calib/step_q_w_n": 834.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1263.0,
"completions/max_terminated_length": 1263.0,
"completions/mean_length": 526.65234375,
"completions/mean_terminated_length": 528.7176513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.1056,
"grad_norm": 0.17127177119255066,
"learning_rate": 2.8333333333333335e-06,
"loss": -0.0171,
"num_tokens": 24984185.0,
"reward": 0.892578125,
"reward_std": 0.20884855091571808,
"rewards/accuracy_reward_step": 0.39453125,
"rewards/format_reward_step": 0.99609375,
"step": 99
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 5.08203125,
"calib/ece": 0.3804365079365078,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.5317460317460317,
"calib/gap": 0.014262108262108386,
"calib/mean_conf": 0.9090079365079363,
"calib/mu_c": 0.9156296296296298,
"calib/mu_w": 0.9013675213675214,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3768650793650792,
"calib/std_conf": 0.07703977492918072,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6183102493074792,
"calib/step_q_c_n": 722.0,
"calib/step_q_gap": 0.059225620637358256,
"calib/step_q_w": 0.559084628670121,
"calib/step_q_w_n": 579.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3019.0,
"completions/max_terminated_length": 3019.0,
"completions/mean_length": 535.65234375,
"completions/mean_terminated_length": 535.65234375,
"completions/min_length": 162.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.21172572672367096,
"learning_rate": 2.805555555555556e-06,
"loss": 0.041,
"num_tokens": 25252528.0,
"reward": 1.0234375,
"reward_std": 0.2752569019794464,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.984375,
"step": 100
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.66015625,
"calib/ece": 0.41351562500000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.49609375,
"calib/gap": 0.03322748091603056,
"calib/mean_conf": 0.9017968749999999,
"calib/mu_c": 0.9188,
"calib/mu_w": 0.8855725190839694,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.41351562500000005,
"calib/std_conf": 0.11340217145290639,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.549247572815534,
"calib/step_q_c_n": 824.0,
"calib/step_q_gap": -0.03996922627640698,
"calib/step_q_w": 0.589216799091941,
"calib/step_q_w_n": 881.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1932.0,
"completions/max_terminated_length": 1932.0,
"completions/mean_length": 546.21484375,
"completions/mean_terminated_length": 548.3568725585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.1781286895275116,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0106,
"num_tokens": 25523159.0,
"reward": 0.98828125,
"reward_std": 0.25395649671554565,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 1.0,
"step": 101
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 5.74609375,
"calib/ece": 0.3009486166007904,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.48221343873517786,
"calib/gap": 0.017669552669552724,
"calib/mean_conf": 0.9096442687747036,
"calib/mu_c": 0.9165584415584418,
"calib/mu_w": 0.8988888888888891,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3009486166007904,
"calib/std_conf": 0.05771224735548806,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6087775061124695,
"calib/step_q_c_n": 818.0,
"calib/step_q_gap": -0.011360319308663658,
"calib/step_q_w": 0.6201378254211332,
"calib/step_q_w_n": 653.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2739.0,
"completions/max_terminated_length": 2739.0,
"completions/mean_length": 467.109375,
"completions/mean_terminated_length": 470.78741455078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.1088,
"grad_norm": 0.21871761977672577,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0158,
"num_tokens": 25773243.0,
"reward": 1.095703125,
"reward_std": 0.2334584891796112,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/format_reward_step": 0.98828125,
"step": 102
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.2265625,
"calib/ece": 0.3555686274509805,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.4470588235294118,
"calib/gap": 0.013400520962540297,
"calib/mean_conf": 0.9006666666666667,
"calib/mu_c": 0.9067625899280577,
"calib/mu_w": 0.8933620689655174,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3555686274509805,
"calib/std_conf": 0.0825948454610336,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6030913978494623,
"calib/step_q_c_n": 744.0,
"calib/step_q_gap": 0.0594561037318152,
"calib/step_q_w": 0.5436352941176471,
"calib/step_q_w_n": 850.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2392.0,
"completions/max_terminated_length": 2392.0,
"completions/mean_length": 568.30078125,
"completions/mean_terminated_length": 570.5294189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.20121116936206818,
"learning_rate": 2.7222222222222224e-06,
"loss": -0.0099,
"num_tokens": 26047088.0,
"reward": 1.041015625,
"reward_std": 0.25462427735328674,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/format_reward_step": 0.99609375,
"step": 103
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.35546875,
"calib/ece": 0.4462450592885375,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4268774703557312,
"calib/gap": 0.01959420289855074,
"calib/mean_conf": 0.9007905138339921,
"calib/mu_c": 0.9114782608695653,
"calib/mu_w": 0.8918840579710146,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4462450592885375,
"calib/std_conf": 0.07168888363264464,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6090327169274539,
"calib/step_q_c_n": 703.0,
"calib/step_q_gap": 0.06891366930840614,
"calib/step_q_w": 0.5401190476190477,
"calib/step_q_w_n": 924.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2812.0,
"completions/max_terminated_length": 2812.0,
"completions/mean_length": 527.26171875,
"completions/mean_terminated_length": 529.3294677734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.1557941883802414,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.0554,
"num_tokens": 26312555.0,
"reward": 0.943359375,
"reward_std": 0.20018061995506287,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/format_reward_step": 0.98828125,
"step": 104
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.23046875,
"calib/ece": 0.37460937499999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.39453125,
"calib/gap": 0.043318670576735,
"calib/mean_conf": 0.880078125,
"calib/mu_c": 0.9010606060606062,
"calib/mu_w": 0.8577419354838712,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36953125,
"calib/std_conf": 0.14698796769288422,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6244238410596027,
"calib/step_q_c_n": 755.0,
"calib/step_q_gap": 0.0893762220119837,
"calib/step_q_w": 0.535047619047619,
"calib/step_q_w_n": 840.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1821.0,
"completions/max_terminated_length": 1821.0,
"completions/mean_length": 526.8203125,
"completions/mean_terminated_length": 528.8862915039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.112,
"grad_norm": 0.23184296488761902,
"learning_rate": 2.666666666666667e-06,
"loss": 0.012,
"num_tokens": 26576989.0,
"reward": 1.015625,
"reward_std": 0.3553318381309509,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 1.0,
"step": 105
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 5.6640625,
"calib/ece": 0.33258823529411763,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.3254901960784314,
"calib/gap": 0.03748251748251763,
"calib/mean_conf": 0.8910196078431373,
"calib/mu_c": 0.9074825174825177,
"calib/mu_w": 0.8700000000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33141176470588235,
"calib/std_conf": 0.09624997890421533,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6440481927710843,
"calib/step_q_c_n": 830.0,
"calib/step_q_gap": 0.11122561212592297,
"calib/step_q_w": 0.5328225806451613,
"calib/step_q_w_n": 620.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2346.0,
"completions/max_terminated_length": 2346.0,
"completions/mean_length": 487.78125,
"completions/mean_terminated_length": 487.78125,
"completions/min_length": 197.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.1972045749425888,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0319,
"num_tokens": 26830253.0,
"reward": 1.056640625,
"reward_std": 0.19517174363136292,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.99609375,
"step": 106
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.48828125,
"calib/ece": 0.30525691699604746,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.32806324110671936,
"calib/gap": 0.0016199194909755077,
"calib/mean_conf": 0.8957707509881423,
"calib/mu_c": 0.8964238410596029,
"calib/mu_w": 0.8948039215686274,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3020948616600791,
"calib/std_conf": 0.06305072628611637,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5832730732635585,
"calib/step_q_c_n": 1051.0,
"calib/step_q_gap": 0.021404220804542073,
"calib/step_q_w": 0.5618688524590164,
"calib/step_q_w_n": 610.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2030.0,
"completions/max_terminated_length": 2030.0,
"completions/mean_length": 482.44140625,
"completions/mean_terminated_length": 486.2401428222656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.21706554293632507,
"learning_rate": 2.6111111111111113e-06,
"loss": -0.0156,
"num_tokens": 27082182.0,
"reward": 1.083984375,
"reward_std": 0.29333245754241943,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.98828125,
"step": 107
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.39453125,
"calib/ece": 0.22215139442231088,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3426294820717131,
"calib/gap": 0.012996824938663498,
"calib/mean_conf": 0.8954581673306773,
"calib/mu_c": 0.8997041420118344,
"calib/mu_w": 0.8867073170731709,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22215139442231088,
"calib/std_conf": 0.05985448192909104,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5330503731343285,
"calib/step_q_c_n": 1072.0,
"calib/step_q_gap": 0.02228931118742583,
"calib/step_q_w": 0.5107610619469026,
"calib/step_q_w_n": 565.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2643.0,
"completions/max_terminated_length": 2643.0,
"completions/mean_length": 528.4609375,
"completions/mean_terminated_length": 534.727294921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.1152,
"grad_norm": 0.1876976191997528,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0132,
"num_tokens": 27344508.0,
"reward": 1.150390625,
"reward_std": 0.2519606649875641,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/format_reward_step": 0.98046875,
"step": 108
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.30859375,
"calib/ece": 0.4309881422924902,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.32806324110671936,
"calib/gap": 0.007616957230653587,
"calib/mean_conf": 0.9013438735177867,
"calib/mu_c": 0.9053781512605045,
"calib/mu_w": 0.8977611940298509,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4309881422924902,
"calib/std_conf": 0.047271107881366875,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6075387384412153,
"calib/step_q_c_n": 757.0,
"calib/step_q_gap": 0.13707195208573963,
"calib/step_q_w": 0.4704667863554757,
"calib/step_q_w_n": 1114.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2763.0,
"completions/max_terminated_length": 2763.0,
"completions/mean_length": 563.3828125,
"completions/mean_terminated_length": 565.5921630859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.1523021161556244,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0357,
"num_tokens": 27617142.0,
"reward": 0.958984375,
"reward_std": 0.1532130092382431,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/format_reward_step": 0.98828125,
"step": 109
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.15234375,
"calib/ece": 0.45134387351778654,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3241106719367589,
"calib/gap": -0.0027101449275359624,
"calib/mean_conf": 0.8995652173913045,
"calib/mu_c": 0.8980869565217394,
"calib/mu_w": 0.9007971014492754,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4481818181818182,
"calib/std_conf": 0.06555485811578877,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5879010494752623,
"calib/step_q_c_n": 667.0,
"calib/step_q_gap": 0.10748254727261913,
"calib/step_q_w": 0.4804185022026432,
"calib/step_q_w_n": 908.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2461.0,
"completions/max_terminated_length": 2461.0,
"completions/mean_length": 536.0859375,
"completions/mean_terminated_length": 538.1882934570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.19876490533351898,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.0254,
"num_tokens": 27883108.0,
"reward": 0.943359375,
"reward_std": 0.29334381222724915,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/format_reward_step": 0.98828125,
"step": 110
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.62890625,
"calib/ece": 0.38494071146245074,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.33201581027667987,
"calib/gap": 0.013279549718574035,
"calib/mean_conf": 0.8987747035573124,
"calib/mu_c": 0.9052307692307693,
"calib/mu_w": 0.8919512195121952,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.38494071146245074,
"calib/std_conf": 0.0745611709527481,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5369295774647888,
"calib/step_q_c_n": 710.0,
"calib/step_q_gap": 0.11332268790045236,
"calib/step_q_w": 0.4236068895643364,
"calib/step_q_w_n": 987.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2428.0,
"completions/max_terminated_length": 2428.0,
"completions/mean_length": 533.6484375,
"completions/mean_terminated_length": 535.7412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.1184,
"grad_norm": 0.21412035822868347,
"learning_rate": 2.5e-06,
"loss": 0.0424,
"num_tokens": 28150938.0,
"reward": 1.001953125,
"reward_std": 0.2785911560058594,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/format_reward_step": 0.98828125,
"step": 111
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 6.67578125,
"calib/ece": 0.35592000000000024,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.22,
"calib/gap": 0.012882205513784384,
"calib/mean_conf": 0.8735200000000001,
"calib/mu_c": 0.8795488721804512,
"calib/mu_w": 0.8666666666666668,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.34872000000000025,
"calib/std_conf": 0.12912788080039106,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4987410071942446,
"calib/step_q_c_n": 834.0,
"calib/step_q_gap": 0.12077529290853029,
"calib/step_q_w": 0.3779657142857143,
"calib/step_q_w_n": 875.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1860.0,
"completions/max_terminated_length": 1860.0,
"completions/mean_length": 554.765625,
"completions/mean_terminated_length": 565.8167724609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.2119046002626419,
"learning_rate": 2.4722222222222226e-06,
"loss": -0.0201,
"num_tokens": 28424686.0,
"reward": 1.0078125,
"reward_std": 0.2706044018268585,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/format_reward_step": 0.9765625,
"step": 112
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.3359375,
"calib/ece": 0.25003937007874033,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.16141732283464566,
"calib/gap": 0.018676865271068155,
"calib/mean_conf": 0.8862598425196851,
"calib/mu_c": 0.8930246913580248,
"calib/mu_w": 0.8743478260869566,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24925196850393716,
"calib/std_conf": 0.05934507556382405,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.4699704433497537,
"calib/step_q_c_n": 1015.0,
"calib/step_q_gap": 0.09557176130692008,
"calib/step_q_w": 0.3743986820428336,
"calib/step_q_w_n": 607.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2715.0,
"completions/max_terminated_length": 2715.0,
"completions/mean_length": 524.54296875,
"completions/mean_terminated_length": 524.54296875,
"completions/min_length": 168.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.21917535364627838,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0458,
"num_tokens": 28687977.0,
"reward": 1.126953125,
"reward_std": 0.25684428215026855,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/format_reward_step": 0.98828125,
"step": 113
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.5234375,
"calib/ece": 0.2729644268774706,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.16600790513833993,
"calib/gap": 0.019191118160190257,
"calib/mean_conf": 0.8895652173913045,
"calib/mu_c": 0.896923076923077,
"calib/mu_w": 0.8777319587628868,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2729644268774706,
"calib/std_conf": 0.07450633773909882,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4834255319148936,
"calib/step_q_c_n": 940.0,
"calib/step_q_gap": 0.09075429903818133,
"calib/step_q_w": 0.3926712328767123,
"calib/step_q_w_n": 730.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2657.0,
"completions/max_terminated_length": 2657.0,
"completions/mean_length": 507.86328125,
"completions/mean_terminated_length": 507.86328125,
"completions/min_length": 156.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.1216,
"grad_norm": 0.2042660415172577,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0274,
"num_tokens": 28946822.0,
"reward": 1.103515625,
"reward_std": 0.26051056385040283,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.98828125,
"step": 114
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.265625,
"calib/ece": 0.3421176470588237,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.17647058823529413,
"calib/gap": 0.016167701863354034,
"calib/mean_conf": 0.8911372549019608,
"calib/mu_c": 0.8984285714285716,
"calib/mu_w": 0.8822608695652175,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3421176470588237,
"calib/std_conf": 0.046827601885498976,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4858668197474168,
"calib/step_q_c_n": 871.0,
"calib/step_q_gap": 0.08058305439953145,
"calib/step_q_w": 0.40528376534788535,
"calib/step_q_w_n": 733.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2410.0,
"completions/max_terminated_length": 2410.0,
"completions/mean_length": 504.40625,
"completions/mean_terminated_length": 504.40625,
"completions/min_length": 188.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.19513730704784393,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0299,
"num_tokens": 29205022.0,
"reward": 1.044921875,
"reward_std": 0.2489628791809082,
"rewards/accuracy_reward_step": 0.546875,
"rewards/format_reward_step": 0.99609375,
"step": 115
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.33984375,
"calib/ece": 0.3352755905511812,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.18503937007874016,
"calib/gap": -0.005189229900207093,
"calib/mean_conf": 0.8903937007874017,
"calib/mu_c": 0.888085106382979,
"calib/mu_w": 0.8932743362831861,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3352755905511812,
"calib/std_conf": 0.07678481615408075,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4639470782800441,
"calib/step_q_c_n": 907.0,
"calib/step_q_gap": 0.07233185194259556,
"calib/step_q_w": 0.39161522633744855,
"calib/step_q_w_n": 972.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2970.0,
"completions/max_terminated_length": 2970.0,
"completions/mean_length": 595.77734375,
"completions/mean_terminated_length": 595.77734375,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.16184002161026,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0391,
"num_tokens": 29485869.0,
"reward": 1.046875,
"reward_std": 0.19438795745372772,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.9921875,
"step": 116
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.7890625,
"calib/ece": 0.42549019607843147,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.12941176470588237,
"calib/gap": 0.0152452619843928,
"calib/mean_conf": 0.8843137254901962,
"calib/mu_c": 0.892564102564103,
"calib/mu_w": 0.8773188405797102,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.42549019607843147,
"calib/std_conf": 0.08700811100258624,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4785615491009682,
"calib/step_q_c_n": 723.0,
"calib/step_q_gap": 0.05154677077584502,
"calib/step_q_w": 0.4270147783251232,
"calib/step_q_w_n": 1015.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2316.0,
"completions/max_terminated_length": 2316.0,
"completions/mean_length": 548.05859375,
"completions/mean_terminated_length": 548.05859375,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.1248,
"grad_norm": 0.2146761268377304,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0386,
"num_tokens": 29756580.0,
"reward": 0.953125,
"reward_std": 0.2651212811470032,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/format_reward_step": 0.9921875,
"step": 117
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.44921875,
"calib/ece": 0.3526771653543309,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.16535433070866143,
"calib/gap": 0.006148175912043974,
"calib/mean_conf": 0.8920472440944882,
"calib/mu_c": 0.8948550724637683,
"calib/mu_w": 0.8887068965517243,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.35070866141732304,
"calib/std_conf": 0.05002107560035077,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.40357984994640944,
"calib/step_q_c_n": 933.0,
"calib/step_q_gap": 0.008590116886861177,
"calib/step_q_w": 0.39498973305954826,
"calib/step_q_w_n": 974.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2829.0,
"completions/max_terminated_length": 2829.0,
"completions/mean_length": 560.59765625,
"completions/mean_terminated_length": 562.7960815429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.18381370604038239,
"learning_rate": 2.305555555555556e-06,
"loss": -0.0124,
"num_tokens": 30027909.0,
"reward": 1.033203125,
"reward_std": 0.21692538261413574,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/format_reward_step": 0.98828125,
"step": 118
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.23046875,
"calib/ece": 0.36031496062992147,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.12598425196850394,
"calib/gap": 0.019348258706467503,
"calib/mean_conf": 0.8878740157480315,
"calib/mu_c": 0.8970149253731343,
"calib/mu_w": 0.8776666666666668,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.36031496062992147,
"calib/std_conf": 0.06190709220255889,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.47093023255813954,
"calib/step_q_c_n": 860.0,
"calib/step_q_gap": 0.13102104991434538,
"calib/step_q_w": 0.33990918264379416,
"calib/step_q_w_n": 991.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2648.0,
"completions/max_terminated_length": 2648.0,
"completions/mean_length": 598.7109375,
"completions/mean_terminated_length": 603.4251708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.20627912878990173,
"learning_rate": 2.277777777777778e-06,
"loss": -0.0183,
"num_tokens": 30310051.0,
"reward": 1.013671875,
"reward_std": 0.2583560347557068,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/format_reward_step": 0.98046875,
"step": 119
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.734375,
"calib/ece": 0.2530708661417323,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.19291338582677164,
"calib/gap": -0.009013550135501092,
"calib/mean_conf": 0.8892913385826774,
"calib/mu_c": 0.8860975609756101,
"calib/mu_w": 0.8951111111111112,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24834645669291341,
"calib/std_conf": 0.07883683041885726,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.4628971962616822,
"calib/step_q_c_n": 1070.0,
"calib/step_q_gap": 0.0582748721026608,
"calib/step_q_w": 0.4046223241590214,
"calib/step_q_w_n": 654.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2038.0,
"completions/max_terminated_length": 2038.0,
"completions/mean_length": 549.375,
"completions/mean_terminated_length": 551.5294189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.128,
"grad_norm": 0.21776804327964783,
"learning_rate": 2.25e-06,
"loss": -0.0059,
"num_tokens": 30581187.0,
"reward": 1.1328125,
"reward_std": 0.24949431419372559,
"rewards/accuracy_reward_step": 0.640625,
"rewards/format_reward_step": 0.984375,
"step": 120
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.37890625,
"calib/ece": 0.3217968750000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.1171875,
"calib/gap": 0.020435865504358497,
"calib/mean_conf": 0.8921093750000002,
"calib/mu_c": 0.9008904109589043,
"calib/mu_w": 0.8804545454545458,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3217968750000001,
"calib/std_conf": 0.0668154214018693,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3749952785646837,
"calib/step_q_c_n": 1059.0,
"calib/step_q_gap": 0.01505166410685238,
"calib/step_q_w": 0.3599436144578313,
"calib/step_q_w_n": 830.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1310.0,
"completions/max_terminated_length": 1310.0,
"completions/mean_length": 579.25390625,
"completions/mean_terminated_length": 581.5255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.23283204436302185,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0246,
"num_tokens": 30858340.0,
"reward": 1.06640625,
"reward_std": 0.3377421498298645,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.9921875,
"step": 121
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.13671875,
"calib/ece": 0.3295238095238098,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.13095238095238096,
"calib/gap": -0.007588374927824604,
"calib/mean_conf": 0.8834920634920637,
"calib/mu_c": 0.8802097902097904,
"calib/mu_w": 0.887798165137615,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.322777777777778,
"calib/std_conf": 0.08854930883830771,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4114647177419355,
"calib/step_q_c_n": 992.0,
"calib/step_q_gap": 0.05212340037666596,
"calib/step_q_w": 0.35934131736526953,
"calib/step_q_w_n": 835.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2699.0,
"completions/max_terminated_length": 2699.0,
"completions/mean_length": 552.42578125,
"completions/mean_terminated_length": 556.7755737304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.18356913328170776,
"learning_rate": 2.1944444444444445e-06,
"loss": -0.0261,
"num_tokens": 31130913.0,
"reward": 1.0546875,
"reward_std": 0.20936806499958038,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.984375,
"step": 122
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.73046875,
"calib/ece": 0.420830039525692,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.09090909090909091,
"calib/gap": -0.00712216229775442,
"calib/mean_conf": 0.888814229249012,
"calib/mu_c": 0.8850420168067232,
"calib/mu_w": 0.8921641791044777,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.41964426877470384,
"calib/std_conf": 0.052556924485385015,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.39687116564417185,
"calib/step_q_c_n": 815.0,
"calib/step_q_gap": 0.048168416503278355,
"calib/step_q_w": 0.3487027491408935,
"calib/step_q_w_n": 1164.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2748.0,
"completions/max_terminated_length": 2748.0,
"completions/mean_length": 642.26171875,
"completions/mean_terminated_length": 642.26171875,
"completions/min_length": 194.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.1312,
"grad_norm": 0.23982587456703186,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0348,
"num_tokens": 31424428.0,
"reward": 0.958984375,
"reward_std": 0.33340585231781006,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/format_reward_step": 0.98828125,
"step": 123
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.78125,
"calib/ece": 0.25070312500000025,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0703125,
"calib/gap": 0.022361556064073196,
"calib/mean_conf": 0.8764843750000002,
"calib/mu_c": 0.8847826086956525,
"calib/mu_w": 0.8624210526315793,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24914062500000023,
"calib/std_conf": 0.09076575142012198,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3849477682811016,
"calib/step_q_c_n": 1053.0,
"calib/step_q_gap": -0.023632026740860346,
"calib/step_q_w": 0.40857979502196196,
"calib/step_q_w_n": 683.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1505.0,
"completions/max_terminated_length": 1505.0,
"completions/mean_length": 535.1875,
"completions/mean_terminated_length": 537.2863159179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.22693496942520142,
"learning_rate": 2.138888888888889e-06,
"loss": 0.0037,
"num_tokens": 31692060.0,
"reward": 1.12890625,
"reward_std": 0.20502206683158875,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/format_reward_step": 1.0,
"step": 124
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.69140625,
"calib/ece": 0.37373015873015897,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0873015873015873,
"calib/gap": -0.006484133493155242,
"calib/mean_conf": 0.8935714285714287,
"calib/mu_c": 0.8904580152671756,
"calib/mu_w": 0.8969421487603308,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37373015873015897,
"calib/std_conf": 0.043378443291612334,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3743412797992472,
"calib/step_q_c_n": 797.0,
"calib/step_q_gap": 0.013107655345098723,
"calib/step_q_w": 0.36123362445414847,
"calib/step_q_w_n": 916.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2746.0,
"completions/max_terminated_length": 2746.0,
"completions/mean_length": 582.0703125,
"completions/mean_terminated_length": 584.3529663085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.23350414633750916,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.043,
"num_tokens": 31969686.0,
"reward": 1.001953125,
"reward_std": 0.34277743101119995,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.98046875,
"step": 125
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/avg_num_step_conf": 8.22265625,
"calib/ece": 0.30871485943775123,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.08032128514056225,
"calib/gap": 0.03169217574877958,
"calib/mean_conf": 0.8830120481927713,
"calib/mu_c": 0.8965034965034968,
"calib/mu_w": 0.8648113207547172,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.30871485943775123,
"calib/std_conf": 0.09798862658073332,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.39058176100628933,
"calib/step_q_c_n": 954.0,
"calib/step_q_gap": 0.03129852903409125,
"calib/step_q_w": 0.3592832319721981,
"calib/step_q_w_n": 1151.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2981.0,
"completions/max_terminated_length": 2981.0,
"completions/mean_length": 650.6015625,
"completions/mean_terminated_length": 650.6015625,
"completions/min_length": 176.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.1344,
"grad_norm": 0.22082242369651794,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0527,
"num_tokens": 32265512.0,
"reward": 1.0390625,
"reward_std": 0.28105270862579346,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.9609375,
"step": 126
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.265625,
"calib/ece": 0.3551984126984128,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.051587301587301584,
"calib/gap": 0.02467424242424232,
"calib/mean_conf": 0.8790079365079367,
"calib/mu_c": 0.8907575757575759,
"calib/mu_w": 0.8660833333333335,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3551984126984128,
"calib/std_conf": 0.10854104600182629,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3945838084378563,
"calib/step_q_c_n": 877.0,
"calib/step_q_gap": 0.036750644653522624,
"calib/step_q_w": 0.35783316378433366,
"calib/step_q_w_n": 983.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2921.0,
"completions/max_terminated_length": 2921.0,
"completions/mean_length": 570.44140625,
"completions/mean_terminated_length": 572.678466796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.25337034463882446,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.085,
"num_tokens": 32539025.0,
"reward": 1.005859375,
"reward_std": 0.2718546390533447,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.98046875,
"step": 127
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.375,
"calib/ece": 0.30916996047430867,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05928853754940711,
"calib/gap": 0.0035153256704979485,
"calib/mean_conf": 0.8822924901185771,
"calib/mu_c": 0.8837931034482761,
"calib/mu_w": 0.8802777777777782,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30916996047430867,
"calib/std_conf": 0.08811307763056817,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3980301274623407,
"calib/step_q_c_n": 863.0,
"calib/step_q_gap": 0.05445405464049413,
"calib/step_q_w": 0.34357607282184655,
"calib/step_q_w_n": 769.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1889.0,
"completions/max_terminated_length": 1889.0,
"completions/mean_length": 556.171875,
"completions/mean_terminated_length": 562.766845703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.23876334726810455,
"learning_rate": 2.027777777777778e-06,
"loss": -0.0019,
"num_tokens": 32811877.0,
"reward": 1.056640625,
"reward_std": 0.3122556209564209,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.98046875,
"step": 128
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.875,
"calib/ece": 0.2761023622047246,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.01968503937007874,
"calib/gap": 0.006414467253176959,
"calib/mean_conf": 0.8863385826771656,
"calib/mu_c": 0.8888387096774196,
"calib/mu_w": 0.8824242424242427,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2761023622047246,
"calib/std_conf": 0.03612613956951282,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.361572265625,
"calib/step_q_c_n": 1024.0,
"calib/step_q_gap": 0.016531504755434745,
"calib/step_q_w": 0.34504076086956526,
"calib/step_q_w_n": 736.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1549.0,
"completions/max_terminated_length": 1549.0,
"completions/mean_length": 517.47265625,
"completions/mean_terminated_length": 519.5020141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.1376,
"grad_norm": 0.20662395656108856,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0179,
"num_tokens": 33070542.0,
"reward": 1.1015625,
"reward_std": 0.21173818409442902,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.9921875,
"step": 129
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.6484375,
"calib/ece": 0.25928571428571456,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05555555555555555,
"calib/gap": -0.013150892672503023,
"calib/mean_conf": 0.8799206349206351,
"calib/mu_c": 0.875276073619632,
"calib/mu_w": 0.888426966292135,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2461904761904765,
"calib/std_conf": 0.10477107343235681,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3432317636195753,
"calib/step_q_c_n": 1083.0,
"calib/step_q_gap": -0.023908785653445674,
"calib/step_q_w": 0.367140549273021,
"calib/step_q_w_n": 619.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1711.0,
"completions/max_terminated_length": 1711.0,
"completions/mean_length": 497.54296875,
"completions/mean_terminated_length": 503.4427185058594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.20441967248916626,
"learning_rate": 1.9722222222222224e-06,
"loss": -0.0314,
"num_tokens": 33327009.0,
"reward": 1.12890625,
"reward_std": 0.1633341908454895,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 0.984375,
"step": 130
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.546875,
"calib/ece": 0.4560629921259844,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.047244094488188976,
"calib/gap": -0.0062163876204970325,
"calib/mean_conf": 0.8781102362204726,
"calib/mu_c": 0.8745370370370374,
"calib/mu_w": 0.8807534246575345,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4544881889763781,
"calib/std_conf": 0.07303577252585294,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3701863354037267,
"calib/step_q_c_n": 644.0,
"calib/step_q_gap": 0.050687304395974686,
"calib/step_q_w": 0.319499031007752,
"calib/step_q_w_n": 1032.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1929.0,
"completions/max_terminated_length": 1929.0,
"completions/mean_length": 504.05078125,
"completions/mean_terminated_length": 508.0196838378906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 236.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.21228958666324615,
"learning_rate": 1.944444444444445e-06,
"loss": -0.0192,
"num_tokens": 33586062.0,
"reward": 0.91796875,
"reward_std": 0.19925057888031006,
"rewards/accuracy_reward_step": 0.421875,
"rewards/format_reward_step": 0.9921875,
"step": 131
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 7.31640625,
"calib/ece": 0.25726562500000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.07421875,
"calib/gap": 0.008959693911207989,
"calib/mean_conf": 0.8939843750000003,
"calib/mu_c": 0.8972392638036814,
"calib/mu_w": 0.8882795698924734,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.25726562500000005,
"calib/std_conf": 0.03220636204012142,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4208199643493762,
"calib/step_q_c_n": 1122.0,
"calib/step_q_gap": 0.1147081134838635,
"calib/step_q_w": 0.3061118508655127,
"calib/step_q_w_n": 751.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1230.0,
"completions/max_terminated_length": 1230.0,
"completions/mean_length": 543.98828125,
"completions/mean_terminated_length": 546.12158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.1408,
"grad_norm": 0.2530343532562256,
"learning_rate": 1.916666666666667e-06,
"loss": 0.0204,
"num_tokens": 33854723.0,
"reward": 1.13671875,
"reward_std": 0.26170387864112854,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 1.0,
"step": 132
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.79296875,
"calib/ece": 0.38494071146245096,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.015810276679841896,
"calib/gap": 0.03768504626156555,
"calib/mean_conf": 0.8750592885375496,
"calib/mu_c": 0.8942741935483874,
"calib/mu_w": 0.8565891472868219,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.38494071146245096,
"calib/std_conf": 0.113960746842225,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.33913721413721415,
"calib/step_q_c_n": 962.0,
"calib/step_q_gap": 0.015032664282422281,
"calib/step_q_w": 0.32410454985479187,
"calib/step_q_w_n": 1033.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2496.0,
"completions/max_terminated_length": 2496.0,
"completions/mean_length": 615.33203125,
"completions/mean_terminated_length": 620.1771850585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.22492097318172455,
"learning_rate": 1.888888888888889e-06,
"loss": -0.0081,
"num_tokens": 34142400.0,
"reward": 0.9765625,
"reward_std": 0.32500171661376953,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.984375,
"step": 133
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.8671875,
"calib/ece": 0.3523046875000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.01953125,
"calib/gap": 0.00012329019198920044,
"calib/mean_conf": 0.8874609375000002,
"calib/mu_c": 0.8875182481751827,
"calib/mu_w": 0.8873949579831935,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3523046875000002,
"calib/std_conf": 0.04205881342383654,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3872364039955605,
"calib/step_q_c_n": 901.0,
"calib/step_q_gap": 0.08064363853465034,
"calib/step_q_w": 0.30659276546091013,
"calib/step_q_w_n": 857.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1417.0,
"completions/max_terminated_length": 1417.0,
"completions/mean_length": 590.69140625,
"completions/mean_terminated_length": 593.0078735351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.2229323834180832,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.024,
"num_tokens": 34426377.0,
"reward": 1.03125,
"reward_std": 0.23475275933742523,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/format_reward_step": 0.9921875,
"step": 134
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.8828125,
"calib/ece": 0.30433070866141765,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.015748031496062992,
"calib/gap": 0.008465068842427259,
"calib/mean_conf": 0.8870078740157482,
"calib/mu_c": 0.8905405405405408,
"calib/mu_w": 0.8820754716981135,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30433070866141765,
"calib/std_conf": 0.04148455666795637,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.3566565040650406,
"calib/step_q_c_n": 984.0,
"calib/step_q_gap": 0.03763336781825399,
"calib/step_q_w": 0.31902313624678663,
"calib/step_q_w_n": 778.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2427.0,
"completions/max_terminated_length": 2427.0,
"completions/mean_length": 584.2734375,
"completions/mean_terminated_length": 586.5647583007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.144,
"grad_norm": 0.22317829728126526,
"learning_rate": 1.8333333333333333e-06,
"loss": -0.0075,
"num_tokens": 34705639.0,
"reward": 1.068359375,
"reward_std": 0.2599042057991028,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.98046875,
"step": 135
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.6484375,
"calib/ece": 0.39130434782608736,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.028525881470367942,
"calib/mean_conf": 0.8814229249011859,
"calib/mu_c": 0.8959677419354842,
"calib/mu_w": 0.8674418604651163,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.39130434782608736,
"calib/std_conf": 0.08511066376839702,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.4052961275626423,
"calib/step_q_c_n": 878.0,
"calib/step_q_gap": 0.12261094237745712,
"calib/step_q_w": 0.2826851851851852,
"calib/step_q_w_n": 1080.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2334.0,
"completions/max_terminated_length": 2334.0,
"completions/mean_length": 575.875,
"completions/mean_terminated_length": 580.409423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.20847293734550476,
"learning_rate": 1.8055555555555557e-06,
"loss": -0.0167,
"num_tokens": 34985359.0,
"reward": 0.9765625,
"reward_std": 0.21726368367671967,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.984375,
"step": 136
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.76953125,
"calib/ece": 0.30309803921568634,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.01568627450980392,
"calib/gap": 0.00392018186410692,
"calib/mean_conf": 0.8834901960784315,
"calib/mu_c": 0.8851351351351353,
"calib/mu_w": 0.8812149532710284,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30309803921568634,
"calib/std_conf": 0.03878516623103988,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3372983870967742,
"calib/step_q_c_n": 992.0,
"calib/step_q_gap": -0.0003534347655739789,
"calib/step_q_w": 0.33765182186234816,
"calib/step_q_w_n": 741.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1237.0,
"completions/max_terminated_length": 1237.0,
"completions/mean_length": 517.1015625,
"completions/mean_terminated_length": 519.1294555664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.20667599141597748,
"learning_rate": 1.777777777777778e-06,
"loss": -0.0081,
"num_tokens": 35248529.0,
"reward": 1.076171875,
"reward_std": 0.15070918202400208,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.99609375,
"step": 137
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.8828125,
"calib/ece": 0.3109842519685041,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.05905511811023622,
"calib/gap": 0.007436582109479284,
"calib/mean_conf": 0.8897244094488189,
"calib/mu_c": 0.8928571428571432,
"calib/mu_w": 0.885420560747664,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3109842519685041,
"calib/std_conf": 0.04171900323534182,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3578183716075157,
"calib/step_q_c_n": 958.0,
"calib/step_q_gap": 0.017830809418460936,
"calib/step_q_w": 0.33998756218905474,
"calib/step_q_w_n": 804.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1838.0,
"completions/max_terminated_length": 1838.0,
"completions/mean_length": 559.6796875,
"completions/mean_terminated_length": 561.8745727539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.1472,
"grad_norm": 0.28204968571662903,
"learning_rate": 1.75e-06,
"loss": 0.0007,
"num_tokens": 35519951.0,
"reward": 1.07421875,
"reward_std": 0.31372708082199097,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.9921875,
"step": 138
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.140625,
"calib/ece": 0.2312992125984255,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.011811023622047244,
"calib/gap": 0.004443521594684197,
"calib/mean_conf": 0.8895669291338584,
"calib/mu_c": 0.8910714285714288,
"calib/mu_w": 0.8866279069767447,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22972440944881922,
"calib/std_conf": 0.04137652926520479,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.39311043566362713,
"calib/step_q_c_n": 987.0,
"calib/step_q_gap": 0.03943522198841343,
"calib/step_q_w": 0.3536752136752137,
"calib/step_q_w_n": 585.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1291.0,
"completions/max_terminated_length": 1291.0,
"completions/mean_length": 483.73828125,
"completions/mean_terminated_length": 487.5472412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.2560315728187561,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0195,
"num_tokens": 35770692.0,
"reward": 1.15234375,
"reward_std": 0.22398819029331207,
"rewards/accuracy_reward_step": 0.65625,
"rewards/format_reward_step": 0.9921875,
"step": 139
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.64453125,
"calib/ece": 0.2891338582677168,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.031496062992125984,
"calib/gap": 0.003798615155632734,
"calib/mean_conf": 0.8914960629921262,
"calib/mu_c": 0.8930065359477124,
"calib/mu_w": 0.8892079207920797,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2891338582677168,
"calib/std_conf": 0.0386296516228139,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4036391752577319,
"calib/step_q_c_n": 970.0,
"calib/step_q_gap": 0.06558171971737625,
"calib/step_q_w": 0.33805745554035566,
"calib/step_q_w_n": 731.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2420.0,
"completions/max_terminated_length": 2420.0,
"completions/mean_length": 554.55078125,
"completions/mean_terminated_length": 558.9172973632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.241206556558609,
"learning_rate": 1.6944444444444446e-06,
"loss": -0.0029,
"num_tokens": 36041481.0,
"reward": 1.09375,
"reward_std": 0.19005943834781647,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.9921875,
"step": 140
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.8359375,
"calib/ece": 0.23257812500000025,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.02734375,
"calib/gap": 0.013732262382864957,
"calib/mean_conf": 0.8810156250000002,
"calib/mu_c": 0.8858433734939763,
"calib/mu_w": 0.8721111111111114,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23257812500000025,
"calib/std_conf": 0.047324343691797506,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3931985294117647,
"calib/step_q_c_n": 1088.0,
"calib/step_q_gap": 0.08118946596765592,
"calib/step_q_w": 0.3120090634441088,
"calib/step_q_w_n": 662.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1694.0,
"completions/max_terminated_length": 1694.0,
"completions/mean_length": 578.109375,
"completions/mean_terminated_length": 580.3765258789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.1504,
"grad_norm": 0.19748741388320923,
"learning_rate": 1.6666666666666667e-06,
"loss": -0.0073,
"num_tokens": 36320381.0,
"reward": 1.14453125,
"reward_std": 0.16845659911632538,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/format_reward_step": 0.9921875,
"step": 141
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.015625,
"calib/ece": 0.3238492063492067,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.027777777777777776,
"calib/gap": 0.002469365496888587,
"calib/mean_conf": 0.891309523809524,
"calib/mu_c": 0.8923776223776227,
"calib/mu_w": 0.8899082568807342,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3238492063492067,
"calib/std_conf": 0.03944394541538628,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.37628865979381443,
"calib/step_q_c_n": 970.0,
"calib/step_q_gap": 0.032826190060158245,
"calib/step_q_w": 0.3434624697336562,
"calib/step_q_w_n": 826.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1499.0,
"completions/max_terminated_length": 1499.0,
"completions/mean_length": 567.62890625,
"completions/mean_terminated_length": 574.3596801757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.2087187021970749,
"learning_rate": 1.638888888888889e-06,
"loss": -0.0134,
"num_tokens": 36594662.0,
"reward": 1.05078125,
"reward_std": 0.20927922427654266,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.984375,
"step": 142
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 7.66015625,
"calib/ece": 0.3233864541832673,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.00398406374501992,
"calib/gap": 0.0018665377176013687,
"calib/mean_conf": 0.8851394422310759,
"calib/mu_c": 0.885957446808511,
"calib/mu_w": 0.8840909090909096,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3233864541832673,
"calib/std_conf": 0.036394809375647104,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4246967779056387,
"calib/step_q_c_n": 869.0,
"calib/step_q_gap": 0.15248066068952149,
"calib/step_q_w": 0.2722161172161172,
"calib/step_q_w_n": 1092.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2847.0,
"completions/max_terminated_length": 2847.0,
"completions/mean_length": 594.078125,
"completions/mean_terminated_length": 596.4078979492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 210.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.22333618998527527,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.0154,
"num_tokens": 36877890.0,
"reward": 1.041015625,
"reward_std": 0.20484405755996704,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.98046875,
"step": 143
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.05859375,
"calib/ece": 0.25389763779527597,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.015748031496062992,
"calib/gap": -7.246376811631539e-05,
"calib/mean_conf": 0.8916929133858269,
"calib/mu_c": 0.8916666666666667,
"calib/mu_w": 0.891739130434783,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.25389763779527597,
"calib/std_conf": 0.030129121552543325,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.3650319051959891,
"calib/step_q_c_n": 1097.0,
"calib/step_q_gap": 0.024609369984721463,
"calib/step_q_w": 0.34042253521126764,
"calib/step_q_w_n": 710.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2595.0,
"completions/max_terminated_length": 2595.0,
"completions/mean_length": 583.72265625,
"completions/mean_terminated_length": 583.72265625,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.1536,
"grad_norm": 0.24799968302249908,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0299,
"num_tokens": 37155259.0,
"reward": 1.12890625,
"reward_std": 0.27356091141700745,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/format_reward_step": 0.9921875,
"step": 144
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 7.265625,
"calib/ece": 0.244685039370079,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": 0.011728395061728292,
"calib/mean_conf": 0.8824803149606301,
"calib/mu_c": 0.8867283950617286,
"calib/mu_w": 0.8750000000000003,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.244685039370079,
"calib/std_conf": 0.06549440685306386,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.417799671592775,
"calib/step_q_c_n": 1218.0,
"calib/step_q_gap": 0.03429499869557873,
"calib/step_q_w": 0.38350467289719625,
"calib/step_q_w_n": 642.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1682.0,
"completions/max_terminated_length": 1682.0,
"completions/mean_length": 536.359375,
"completions/mean_terminated_length": 538.4627685546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.24093927443027496,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0184,
"num_tokens": 37419079.0,
"reward": 1.1328125,
"reward_std": 0.2124004065990448,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 0.9921875,
"step": 145
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 8.109375,
"calib/ece": 0.4515294117647062,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.027450980392156862,
"calib/gap": 0.016344280719280624,
"calib/mean_conf": 0.8907450980392159,
"calib/mu_c": 0.8999107142857145,
"calib/mu_w": 0.8835664335664338,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4515294117647062,
"calib/std_conf": 0.06471531719687658,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3966867469879518,
"calib/step_q_c_n": 830.0,
"calib/step_q_gap": 0.026494130615560163,
"calib/step_q_w": 0.37019261637239165,
"calib/step_q_w_n": 1246.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1985.0,
"completions/max_terminated_length": 1985.0,
"completions/mean_length": 618.1015625,
"completions/mean_terminated_length": 618.1015625,
"completions/min_length": 174.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.25144022703170776,
"learning_rate": 1.527777777777778e-06,
"loss": 0.028,
"num_tokens": 37708337.0,
"reward": 0.931640625,
"reward_std": 0.27421265840530396,
"rewards/accuracy_reward_step": 0.4375,
"rewards/format_reward_step": 0.98828125,
"step": 146
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.16015625,
"calib/ece": 0.38838582677165373,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.01968503937007874,
"calib/gap": -0.010448085396884799,
"calib/mean_conf": 0.8844488188976379,
"calib/mu_c": 0.8793893129770993,
"calib/mu_w": 0.8898373983739841,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3785433070866144,
"calib/std_conf": 0.0885373899984569,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.44844789356984477,
"calib/step_q_c_n": 902.0,
"calib/step_q_gap": 0.0846992362121648,
"calib/step_q_w": 0.36374865735767997,
"calib/step_q_w_n": 931.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2167.0,
"completions/max_terminated_length": 2167.0,
"completions/mean_length": 578.62890625,
"completions/mean_terminated_length": 580.8980712890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.1568,
"grad_norm": 0.20188067853450775,
"learning_rate": 1.5e-06,
"loss": 0.006,
"num_tokens": 37983954.0,
"reward": 1.005859375,
"reward_std": 0.1799573004245758,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.98828125,
"step": 147
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.87890625,
"calib/ece": 0.27743083003952596,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011857707509881422,
"calib/gap": 0.008956550362080429,
"calib/mean_conf": 0.8900790513833993,
"calib/mu_c": 0.8935483870967745,
"calib/mu_w": 0.8845918367346941,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.27743083003952596,
"calib/std_conf": 0.060085525966913855,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.43912957467853614,
"calib/step_q_c_n": 1011.0,
"calib/step_q_gap": 0.013929574678536172,
"calib/step_q_w": 0.42519999999999997,
"calib/step_q_w_n": 750.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2380.0,
"completions/max_terminated_length": 2380.0,
"completions/mean_length": 550.26953125,
"completions/mean_terminated_length": 554.6023559570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.24239996075630188,
"learning_rate": 1.4722222222222225e-06,
"loss": -0.0064,
"num_tokens": 38253743.0,
"reward": 1.09765625,
"reward_std": 0.23398509621620178,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.984375,
"step": 148
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.8359375,
"calib/ece": 0.3885375494071147,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.015810276679841896,
"calib/gap": 0.007246874999999875,
"calib/mean_conf": 0.8944664031620555,
"calib/mu_c": 0.8980468750000002,
"calib/mu_w": 0.8908000000000004,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3885375494071147,
"calib/std_conf": 0.0375774806642311,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4303410475030451,
"calib/step_q_c_n": 821.0,
"calib/step_q_gap": 0.07758999265072442,
"calib/step_q_w": 0.35275105485232067,
"calib/step_q_w_n": 1185.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2815.0,
"completions/max_terminated_length": 2815.0,
"completions/mean_length": 627.0859375,
"completions/mean_terminated_length": 627.0859375,
"completions/min_length": 186.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.23352481424808502,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0634,
"num_tokens": 38542541.0,
"reward": 0.990234375,
"reward_std": 0.28249531984329224,
"rewards/accuracy_reward_step": 0.5,
"rewards/format_reward_step": 0.98046875,
"step": 149
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 7.06640625,
"calib/ece": 0.31148594377510064,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.01606425702811245,
"calib/gap": 0.0175535714285715,
"calib/mean_conf": 0.8873895582329319,
"calib/mu_c": 0.8947916666666669,
"calib/mu_w": 0.8772380952380954,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.31028112449799217,
"calib/std_conf": 0.06665873083845503,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4666407982261641,
"calib/step_q_c_n": 902.0,
"calib/step_q_gap": 0.06709283791745407,
"calib/step_q_w": 0.39954796030871004,
"calib/step_q_w_n": 907.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2383.0,
"completions/max_terminated_length": 2383.0,
"completions/mean_length": 506.859375,
"completions/mean_terminated_length": 514.90478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.16,
"grad_norm": 0.2571173906326294,
"learning_rate": 1.4166666666666667e-06,
"loss": -0.0348,
"num_tokens": 38801065.0,
"reward": 1.048828125,
"reward_std": 0.2649000883102417,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.97265625,
"step": 150
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/avg_num_step_conf": 8.09765625,
"calib/ece": 0.3912350597609565,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00708571428571414,
"calib/mean_conf": 0.8892430278884463,
"calib/mu_c": 0.8928000000000004,
"calib/mu_w": 0.8857142857142862,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3912350597609565,
"calib/std_conf": 0.04070867128174436,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4377538829151732,
"calib/step_q_c_n": 837.0,
"calib/step_q_gap": 0.09190436835206639,
"calib/step_q_w": 0.3458495145631068,
"calib/step_q_w_n": 1236.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2631.0,
"completions/max_terminated_length": 2631.0,
"completions/mean_length": 612.34375,
"completions/mean_terminated_length": 617.1653442382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.24429307878017426,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0082,
"num_tokens": 39088657.0,
"reward": 0.974609375,
"reward_std": 0.2938493490219116,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 0.97265625,
"step": 151
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 7.5625,
"calib/ece": 0.36968503937007885,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.031496062992125984,
"calib/gap": -0.006805684850742066,
"calib/mean_conf": 0.8854330708661419,
"calib/mu_c": 0.8821374045801527,
"calib/mu_w": 0.8889430894308947,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36968503937007885,
"calib/std_conf": 0.03714741659740444,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.3964218258132214,
"calib/step_q_c_n": 953.0,
"calib/step_q_gap": 0.0075408492109833025,
"calib/step_q_w": 0.3888809766022381,
"calib/step_q_w_n": 983.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1946.0,
"completions/max_terminated_length": 1946.0,
"completions/mean_length": 561.98828125,
"completions/mean_terminated_length": 564.1921997070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.24398364126682281,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0061,
"num_tokens": 39361726.0,
"reward": 1.0,
"reward_std": 0.28061729669570923,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.9765625,
"step": 152
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 7.1484375,
"calib/ece": 0.32159362549800813,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.027888446215139442,
"calib/gap": 0.0038603322949116725,
"calib/mean_conf": 0.8952988047808766,
"calib/mu_c": 0.8969444444444447,
"calib/mu_w": 0.893084112149533,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.32159362549800813,
"calib/std_conf": 0.025565523164352975,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.459238683127572,
"calib/step_q_c_n": 972.0,
"calib/step_q_gap": 0.07415709804598691,
"calib/step_q_w": 0.3850815850815851,
"calib/step_q_w_n": 858.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2561.0,
"completions/max_terminated_length": 2561.0,
"completions/mean_length": 601.21484375,
"completions/mean_terminated_length": 605.9487915039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.1632,
"grad_norm": 0.24795496463775635,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0033,
"num_tokens": 39646765.0,
"reward": 1.046875,
"reward_std": 0.3021770715713501,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.96875,
"step": 153
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.90234375,
"calib/ece": 0.3743650793650797,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.007936507936507936,
"calib/gap": 0.009522427607090878,
"calib/mean_conf": 0.8942063492063493,
"calib/mu_c": 0.8987786259541986,
"calib/mu_w": 0.8892561983471077,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3743650793650797,
"calib/std_conf": 0.02536568030756037,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4836067415730337,
"calib/step_q_c_n": 890.0,
"calib/step_q_gap": 0.0682133550279938,
"calib/step_q_w": 0.4153933865450399,
"calib/step_q_w_n": 877.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2572.0,
"completions/max_terminated_length": 2572.0,
"completions/mean_length": 545.1328125,
"completions/mean_terminated_length": 547.2706298828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.2542245388031006,
"learning_rate": 1.3055555555555556e-06,
"loss": -0.001,
"num_tokens": 39914567.0,
"reward": 1.0078125,
"reward_std": 0.2664416432380676,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.984375,
"step": 154
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.6484375,
"calib/ece": 0.3974609375000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.01171875,
"calib/gap": 0.004560439560439833,
"calib/mean_conf": 0.8833984375000002,
"calib/mu_c": 0.8857142857142861,
"calib/mu_w": 0.8811538461538463,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3943359375000002,
"calib/std_conf": 0.09337306221581573,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.5181536555142503,
"calib/step_q_c_n": 807.0,
"calib/step_q_gap": 0.09922628121257432,
"calib/step_q_w": 0.418927374301676,
"calib/step_q_w_n": 895.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1226.0,
"completions/max_terminated_length": 1226.0,
"completions/mean_length": 512.37109375,
"completions/mean_terminated_length": 514.3804321289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.24787573516368866,
"learning_rate": 1.2777777777777779e-06,
"loss": -0.0004,
"num_tokens": 40176758.0,
"reward": 0.990234375,
"reward_std": 0.20889762043952942,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/format_reward_step": 0.99609375,
"step": 155
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.5546875,
"calib/ece": 0.36360000000000037,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.024,
"calib/gap": 0.024446725254987145,
"calib/mean_conf": 0.8876000000000002,
"calib/mu_c": 0.8992366412213741,
"calib/mu_w": 0.874789915966387,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36360000000000037,
"calib/std_conf": 0.07018717831627085,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4461740041928721,
"calib/step_q_c_n": 954.0,
"calib/step_q_gap": 0.035439310315321104,
"calib/step_q_w": 0.410734693877551,
"calib/step_q_w_n": 980.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2958.0,
"completions/max_terminated_length": 2958.0,
"completions/mean_length": 581.359375,
"completions/mean_terminated_length": 581.359375,
"completions/min_length": 155.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.1664,
"grad_norm": 0.23543541133403778,
"learning_rate": 1.25e-06,
"loss": 0.0225,
"num_tokens": 40454154.0,
"reward": 1.00390625,
"reward_std": 0.28826624155044556,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.9765625,
"step": 156
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.98046875,
"calib/ece": 0.26778656126482253,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.01715100849256923,
"calib/mean_conf": 0.8804347826086958,
"calib/mu_c": 0.8869426751592361,
"calib/mu_w": 0.8697916666666669,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2638339920948621,
"calib/std_conf": 0.08412892829895907,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.4687330316742082,
"calib/step_q_c_n": 1105.0,
"calib/step_q_gap": 0.04959813431350435,
"calib/step_q_w": 0.41913489736070386,
"calib/step_q_w_n": 682.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2749.0,
"completions/max_terminated_length": 2749.0,
"completions/mean_length": 544.91015625,
"completions/mean_terminated_length": 544.91015625,
"completions/min_length": 166.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.24358108639717102,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0189,
"num_tokens": 40721187.0,
"reward": 1.107421875,
"reward_std": 0.27992281317710876,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/format_reward_step": 0.98046875,
"step": 157
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 7.54296875,
"calib/ece": 0.30439215686274546,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.00784313725490196,
"calib/gap": -0.010980392156863195,
"calib/mean_conf": 0.8855686274509805,
"calib/mu_c": 0.8811764705882354,
"calib/mu_w": 0.8921568627450986,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2949803921568631,
"calib/std_conf": 0.084535467646858,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4779475982532751,
"calib/step_q_c_n": 1145.0,
"calib/step_q_gap": 0.017286020645132627,
"calib/step_q_w": 0.46066157760814247,
"calib/step_q_w_n": 786.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1700.0,
"completions/max_terminated_length": 1700.0,
"completions/mean_length": 537.83984375,
"completions/mean_terminated_length": 539.9490356445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.2743615210056305,
"learning_rate": 1.1944444444444446e-06,
"loss": 0.0309,
"num_tokens": 40987922.0,
"reward": 1.095703125,
"reward_std": 0.279867947101593,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.99609375,
"step": 158
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.29296875,
"calib/ece": 0.3318972332015812,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.04743083003952569,
"calib/gap": 0.009967643700038109,
"calib/mean_conf": 0.8931620553359686,
"calib/mu_c": 0.8975352112676059,
"calib/mu_w": 0.8875675675675678,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3318972332015812,
"calib/std_conf": 0.02956003843798982,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4912575574365175,
"calib/step_q_c_n": 827.0,
"calib/step_q_gap": 0.050925924783456256,
"calib/step_q_w": 0.44033163265306124,
"calib/step_q_w_n": 784.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2662.0,
"completions/max_terminated_length": 2662.0,
"completions/mean_length": 519.953125,
"completions/mean_terminated_length": 521.9921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.1696,
"grad_norm": 0.2283385694026947,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0215,
"num_tokens": 41249622.0,
"reward": 1.048828125,
"reward_std": 0.19320310652256012,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.98828125,
"step": 159
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.19921875,
"calib/ece": 0.33150197628458533,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.019762845849802372,
"calib/gap": 0.00684397163120587,
"calib/mean_conf": 0.8888142292490121,
"calib/mu_c": 0.891843971631206,
"calib/mu_w": 0.8850000000000001,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33150197628458533,
"calib/std_conf": 0.06296508439496006,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5101843817787419,
"calib/step_q_c_n": 922.0,
"calib/step_q_gap": 0.07480978894486567,
"calib/step_q_w": 0.43537459283387625,
"calib/step_q_w_n": 921.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2969.0,
"completions/max_terminated_length": 2969.0,
"completions/mean_length": 528.37109375,
"completions/mean_terminated_length": 530.4431762695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.2358379364013672,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0009,
"num_tokens": 41513533.0,
"reward": 1.044921875,
"reward_std": 0.23529572784900665,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.98828125,
"step": 160
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.609375,
"calib/ece": 0.1893359375000003,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0390625,
"calib/gap": 0.01641812865497061,
"calib/mean_conf": 0.8895703125000001,
"calib/mu_c": 0.8944444444444447,
"calib/mu_w": 0.8780263157894741,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1878906250000003,
"calib/std_conf": 0.075131466401584,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.47634879725085916,
"calib/step_q_c_n": 1164.0,
"calib/step_q_gap": 0.023521145735707627,
"calib/step_q_w": 0.45282765151515153,
"calib/step_q_w_n": 528.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1357.0,
"completions/max_terminated_length": 1357.0,
"completions/mean_length": 502.0546875,
"completions/mean_terminated_length": 504.0235595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.2519267499446869,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0282,
"num_tokens": 41769787.0,
"reward": 1.203125,
"reward_std": 0.19977852702140808,
"rewards/accuracy_reward_step": 0.703125,
"rewards/format_reward_step": 1.0,
"step": 161
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.87890625,
"calib/ece": 0.2341568627450984,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0196078431372549,
"calib/gap": 0.006736617586349025,
"calib/mean_conf": 0.8906274509803923,
"calib/mu_c": 0.8928994082840239,
"calib/mu_w": 0.8861627906976749,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2310196078431376,
"calib/std_conf": 0.05056929269913447,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5035537190082645,
"calib/step_q_c_n": 1089.0,
"calib/step_q_gap": 0.10638109996064543,
"calib/step_q_w": 0.3971726190476191,
"calib/step_q_w_n": 672.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2860.0,
"completions/max_terminated_length": 2860.0,
"completions/mean_length": 513.66796875,
"completions/mean_terminated_length": 513.66796875,
"completions/min_length": 174.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.1728,
"grad_norm": 0.2774161100387573,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0425,
"num_tokens": 42029238.0,
"reward": 1.158203125,
"reward_std": 0.2824953496456146,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/format_reward_step": 0.99609375,
"step": 162
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 7.61328125,
"calib/ece": 0.3974501992031875,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0199203187250996,
"calib/gap": 0.0136626873253749,
"calib/mean_conf": 0.8914741035856576,
"calib/mu_c": 0.8983870967741939,
"calib/mu_w": 0.884724409448819,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3974501992031875,
"calib/std_conf": 0.03204544716426028,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5095477386934674,
"calib/step_q_c_n": 796.0,
"calib/step_q_gap": 0.07791720963882726,
"calib/step_q_w": 0.4316305290546401,
"calib/step_q_w_n": 1153.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2408.0,
"completions/max_terminated_length": 2408.0,
"completions/mean_length": 579.25390625,
"completions/mean_terminated_length": 581.5255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.26556578278541565,
"learning_rate": 1.0555555555555557e-06,
"loss": -0.0127,
"num_tokens": 42306167.0,
"reward": 0.978515625,
"reward_std": 0.2926446497440338,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/format_reward_step": 0.97265625,
"step": 163
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.546875,
"calib/ece": 0.3106324110671939,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.023715415019762844,
"calib/gap": -0.0006621621621620832,
"calib/mean_conf": 0.895612648221344,
"calib/mu_c": 0.8953378378378382,
"calib/mu_w": 0.8960000000000002,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3106324110671939,
"calib/std_conf": 0.023626711580360647,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.4920617529880478,
"calib/step_q_c_n": 1004.0,
"calib/step_q_gap": 0.045488477125978755,
"calib/step_q_w": 0.446573275862069,
"calib/step_q_w_n": 928.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2647.0,
"completions/max_terminated_length": 2647.0,
"completions/mean_length": 626.21875,
"completions/mean_terminated_length": 626.21875,
"completions/min_length": 173.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.24319250881671906,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0384,
"num_tokens": 42596423.0,
"reward": 1.076171875,
"reward_std": 0.27499398589134216,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.98828125,
"step": 164
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 7.109375,
"calib/ece": 0.4509375000000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0234375,
"calib/gap": 0.002016308376575293,
"calib/mean_conf": 0.8962500000000002,
"calib/mu_c": 0.8973684210526319,
"calib/mu_w": 0.8953521126760566,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.4509375000000002,
"calib/std_conf": 0.022447856245084954,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.5375154511742892,
"calib/step_q_c_n": 809.0,
"calib/step_q_gap": 0.05436015938398259,
"calib/step_q_w": 0.48315529179030664,
"calib/step_q_w_n": 1011.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1444.0,
"completions/max_terminated_length": 1444.0,
"completions/mean_length": 559.75,
"completions/mean_terminated_length": 561.9451293945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.176,
"grad_norm": 0.24757790565490723,
"learning_rate": 1.0000000000000002e-06,
"loss": -0.0085,
"num_tokens": 42869103.0,
"reward": 0.939453125,
"reward_std": 0.1965903639793396,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/format_reward_step": 0.98828125,
"step": 165
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 7.40234375,
"calib/ece": 0.2964453125000003,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0625,
"calib/gap": 0.007933537051183825,
"calib/mean_conf": 0.8980078125000003,
"calib/mu_c": 0.9011688311688314,
"calib/mu_w": 0.8932352941176476,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2964453125000003,
"calib/std_conf": 0.02367454992106172,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5356290773532153,
"calib/step_q_c_n": 1073.0,
"calib/step_q_gap": 0.04134683891039298,
"calib/step_q_w": 0.4942822384428223,
"calib/step_q_w_n": 822.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1865.0,
"completions/max_terminated_length": 1865.0,
"completions/mean_length": 591.34765625,
"completions/mean_terminated_length": 593.6666870117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.23903323709964752,
"learning_rate": 9.722222222222224e-07,
"loss": -0.0011,
"num_tokens": 43150480.0,
"reward": 1.1015625,
"reward_std": 0.17950758337974548,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/format_reward_step": 1.0,
"step": 166
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 7.3203125,
"calib/ece": 0.21704724409448858,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.01968503937007874,
"calib/gap": -0.0018104617141223578,
"calib/mean_conf": 0.8981496062992128,
"calib/mu_c": 0.8975722543352604,
"calib/mu_w": 0.8993827160493828,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.21704724409448858,
"calib/std_conf": 0.017816907276078237,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.558006589785832,
"calib/step_q_c_n": 1214.0,
"calib/step_q_gap": 0.08468840796765009,
"calib/step_q_w": 0.47331818181818186,
"calib/step_q_w_n": 660.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2999.0,
"completions/max_terminated_length": 2999.0,
"completions/mean_length": 545.55859375,
"completions/mean_terminated_length": 545.55859375,
"completions/min_length": 187.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.24380220472812653,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0622,
"num_tokens": 43419559.0,
"reward": 1.169921875,
"reward_std": 0.24672676622867584,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/format_reward_step": 0.98828125,
"step": 167
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 6.82421875,
"calib/ece": 0.33366533864541864,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.02390438247011952,
"calib/gap": 0.006246937459703283,
"calib/mean_conf": 0.8954183266932273,
"calib/mu_c": 0.8981560283687946,
"calib/mu_w": 0.8919090909090913,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.33366533864541864,
"calib/std_conf": 0.029942246912816883,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.5973455377574372,
"calib/step_q_c_n": 874.0,
"calib/step_q_gap": 0.10586787452719659,
"calib/step_q_w": 0.49147766323024056,
"calib/step_q_w_n": 873.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2670.0,
"completions/max_terminated_length": 2670.0,
"completions/mean_length": 599.2109375,
"completions/mean_terminated_length": 606.3162231445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.1792,
"grad_norm": 0.2356565147638321,
"learning_rate": 9.166666666666666e-07,
"loss": 0.0053,
"num_tokens": 43701437.0,
"reward": 1.041015625,
"reward_std": 0.2755982577800751,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.98046875,
"step": 168
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.14453125,
"calib/ece": 0.36667984189723346,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.039525691699604744,
"calib/gap": 0.0017860278439728594,
"calib/mean_conf": 0.896324110671937,
"calib/mu_c": 0.8971641791044777,
"calib/mu_w": 0.8953781512605048,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36667984189723346,
"calib/std_conf": 0.025043947374533897,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5402594594594595,
"calib/step_q_c_n": 925.0,
"calib/step_q_gap": 0.0609563621143267,
"calib/step_q_w": 0.4793030973451328,
"calib/step_q_w_n": 904.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2455.0,
"completions/max_terminated_length": 2455.0,
"completions/mean_length": 580.05859375,
"completions/mean_terminated_length": 582.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.22578802704811096,
"learning_rate": 8.88888888888889e-07,
"loss": 0.0145,
"num_tokens": 43977924.0,
"reward": 1.015625,
"reward_std": 0.23758748173713684,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/format_reward_step": 0.984375,
"step": 169
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.70703125,
"calib/ece": 0.3147826086956524,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.015810276679841896,
"calib/gap": 0.009025798998844703,
"calib/mean_conf": 0.895810276679842,
"calib/mu_c": 0.8995918367346941,
"calib/mu_w": 0.8905660377358494,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3147826086956524,
"calib/std_conf": 0.058342009667291285,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5898637602179836,
"calib/step_q_c_n": 1101.0,
"calib/step_q_gap": 0.09208853086018542,
"calib/step_q_w": 0.49777522935779817,
"calib/step_q_w_n": 872.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1655.0,
"completions/max_terminated_length": 1655.0,
"completions/mean_length": 565.59375,
"completions/mean_terminated_length": 570.0472412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.2454632967710495,
"learning_rate": 8.611111111111112e-07,
"loss": -0.0245,
"num_tokens": 44250676.0,
"reward": 1.068359375,
"reward_std": 0.2688901424407959,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.98828125,
"step": 170
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.84765625,
"calib/ece": 0.39325490196078455,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.00784313725490196,
"calib/gap": 0.03912398424033525,
"calib/mean_conf": 0.8720784313725491,
"calib/mu_c": 0.8921774193548392,
"calib/mu_w": 0.8530534351145039,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3895294117647061,
"calib/std_conf": 0.1374159055692672,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.579527950310559,
"calib/step_q_c_n": 805.0,
"calib/step_q_gap": 0.028399258327436616,
"calib/step_q_w": 0.5511286919831224,
"calib/step_q_w_n": 948.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2462.0,
"completions/max_terminated_length": 2462.0,
"completions/mean_length": 529.40625,
"completions/mean_terminated_length": 529.40625,
"completions/min_length": 185.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.1824,
"grad_norm": 0.265653133392334,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0417,
"num_tokens": 44516908.0,
"reward": 0.982421875,
"reward_std": 0.2800021767616272,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.99609375,
"step": 171
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.4765625,
"calib/ece": 0.255793650793651,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.01984126984126984,
"calib/gap": 0.014725274725274629,
"calib/mean_conf": 0.8946825396825399,
"calib/mu_c": 0.9000000000000002,
"calib/mu_w": 0.8852747252747256,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.255793650793651,
"calib/std_conf": 0.058779740975148646,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.5805786802030458,
"calib/step_q_c_n": 985.0,
"calib/step_q_gap": 0.09230230576025233,
"calib/step_q_w": 0.4882763744427935,
"calib/step_q_w_n": 673.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2941.0,
"completions/max_terminated_length": 2941.0,
"completions/mean_length": 528.34375,
"completions/mean_terminated_length": 530.4157104492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.25087136030197144,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0088,
"num_tokens": 44779324.0,
"reward": 1.119140625,
"reward_std": 0.24476386606693268,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/format_reward_step": 0.98046875,
"step": 172
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 7.22265625,
"calib/ece": 0.26261904761904786,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.027777777777777776,
"calib/gap": 0.0009184782608693265,
"calib/mean_conf": 0.8975396825396826,
"calib/mu_c": 0.8978750000000002,
"calib/mu_w": 0.8969565217391309,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.26261904761904786,
"calib/std_conf": 0.020651550075820927,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.5774318381706245,
"calib/step_q_c_n": 1137.0,
"calib/step_q_gap": 0.1100020628897257,
"calib/step_q_w": 0.4674297752808988,
"calib/step_q_w_n": 712.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2034.0,
"completions/max_terminated_length": 2034.0,
"completions/mean_length": 552.3046875,
"completions/mean_terminated_length": 556.653564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.2781237065792084,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0031,
"num_tokens": 45047682.0,
"reward": 1.11328125,
"reward_std": 0.28275614976882935,
"rewards/accuracy_reward_step": 0.625,
"rewards/format_reward_step": 0.9765625,
"step": 173
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 7.22265625,
"calib/ece": 0.40470119521912384,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0398406374501992,
"calib/gap": -0.003434721869443824,
"calib/mean_conf": 0.8963346613545817,
"calib/mu_c": 0.8945967741935488,
"calib/mu_w": 0.8980314960629926,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4035059760956179,
"calib/std_conf": 0.025594749247233217,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.6048370136698213,
"calib/step_q_c_n": 951.0,
"calib/step_q_gap": 0.031507392288975034,
"calib/step_q_w": 0.5733296213808463,
"calib/step_q_w_n": 898.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2634.0,
"completions/max_terminated_length": 2634.0,
"completions/mean_length": 640.65234375,
"completions/mean_terminated_length": 640.65234375,
"completions/min_length": 194.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.1856,
"grad_norm": 0.22465208172798157,
"learning_rate": 7.5e-07,
"loss": -0.0072,
"num_tokens": 45339729.0,
"reward": 0.98046875,
"reward_std": 0.2972414195537567,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.96875,
"step": 174
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.7421875,
"calib/ece": 0.42886274509803946,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.023529411764705882,
"calib/gap": 0.0027100840336139465,
"calib/mean_conf": 0.8955294117647059,
"calib/mu_c": 0.8969747899159669,
"calib/mu_w": 0.894264705882353,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.42886274509803946,
"calib/std_conf": 0.02368945570119087,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.5905570652173913,
"calib/step_q_c_n": 736.0,
"calib/step_q_gap": 0.029658075318401367,
"calib/step_q_w": 0.5608989898989899,
"calib/step_q_w_n": 990.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2498.0,
"completions/max_terminated_length": 2498.0,
"completions/mean_length": 554.14453125,
"completions/mean_terminated_length": 556.3176879882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.26532062888145447,
"learning_rate": 7.222222222222222e-07,
"loss": -0.0224,
"num_tokens": 45611222.0,
"reward": 0.9609375,
"reward_std": 0.28407180309295654,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/format_reward_step": 0.9921875,
"step": 175
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.9140625,
"calib/ece": 0.4006719367588934,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.039525691699604744,
"calib/gap": 0.008097649412353247,
"calib/mean_conf": 0.8907905138339921,
"calib/mu_c": 0.89491935483871,
"calib/mu_w": 0.8868217054263567,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4006719367588934,
"calib/std_conf": 0.06436807132161741,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6102705882352941,
"calib/step_q_c_n": 850.0,
"calib/step_q_gap": 0.06380319693094627,
"calib/step_q_w": 0.5464673913043478,
"calib/step_q_w_n": 920.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3050.0,
"completions/max_terminated_length": 3050.0,
"completions/mean_length": 556.6171875,
"completions/mean_terminated_length": 558.800048828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.22770024836063385,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0075,
"num_tokens": 45881588.0,
"reward": 0.978515625,
"reward_std": 0.2112065553665161,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.98828125,
"step": 176
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 7.22265625,
"calib/ece": 0.3003187250996017,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.03187250996015936,
"calib/gap": -0.0024726072607261784,
"calib/mean_conf": 0.8979282868525899,
"calib/mu_c": 0.8969333333333337,
"calib/mu_w": 0.8994059405940599,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3003187250996017,
"calib/std_conf": 0.01907447233017699,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6147472527472528,
"calib/step_q_c_n": 910.0,
"calib/step_q_gap": 0.1496460812882539,
"calib/step_q_w": 0.46510117145899893,
"calib/step_q_w_n": 939.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2455.0,
"completions/max_terminated_length": 2455.0,
"completions/mean_length": 554.67578125,
"completions/mean_terminated_length": 561.2529907226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.1888,
"grad_norm": 0.2137850821018219,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0007,
"num_tokens": 46151225.0,
"reward": 1.076171875,
"reward_std": 0.22957715392112732,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.98046875,
"step": 177
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.5703125,
"calib/ece": 0.31807843137254915,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.054901960784313725,
"calib/gap": -0.0076066860833226,
"calib/mean_conf": 0.9012156862745099,
"calib/mu_c": 0.8980536912751682,
"calib/mu_w": 0.9056603773584908,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3174901960784315,
"calib/std_conf": 0.021136636463723723,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6420581395348838,
"calib/step_q_c_n": 860.0,
"calib/step_q_gap": 0.06930874780738983,
"calib/step_q_w": 0.572749391727494,
"calib/step_q_w_n": 822.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2956.0,
"completions/max_terminated_length": 2956.0,
"completions/mean_length": 544.83203125,
"completions/mean_terminated_length": 544.83203125,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.27774763107299805,
"learning_rate": 6.388888888888889e-07,
"loss": 0.0453,
"num_tokens": 46420582.0,
"reward": 1.080078125,
"reward_std": 0.2694709300994873,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.99609375,
"step": 178
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/avg_num_step_conf": 6.59375,
"calib/ece": 0.28916334661354604,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.027888446215139442,
"calib/gap": 0.0002521008403362224,
"calib/mean_conf": 0.8987250996015937,
"calib/mu_c": 0.8988235294117649,
"calib/mu_w": 0.8985714285714287,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28916334661354604,
"calib/std_conf": 0.01857385729434993,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6282080329557158,
"calib/step_q_c_n": 971.0,
"calib/step_q_gap": 0.09612992974790546,
"calib/step_q_w": 0.5320781032078103,
"calib/step_q_w_n": 717.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2730.0,
"completions/max_terminated_length": 2730.0,
"completions/mean_length": 548.7734375,
"completions/mean_terminated_length": 550.925537109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.25895431637763977,
"learning_rate": 6.111111111111112e-07,
"loss": 0.037,
"num_tokens": 46691140.0,
"reward": 1.087890625,
"reward_std": 0.27308064699172974,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.98046875,
"step": 179
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/avg_num_step_conf": 7.3984375,
"calib/ece": 0.31546184738955846,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.024096385542168676,
"calib/gap": -0.0001604774535810538,
"calib/mean_conf": 0.8977911646586347,
"calib/mu_c": 0.8977241379310346,
"calib/mu_w": 0.8978846153846156,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.31546184738955846,
"calib/std_conf": 0.0190839553461964,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6108915441176471,
"calib/step_q_c_n": 1088.0,
"calib/step_q_gap": 0.04966325627645607,
"calib/step_q_w": 0.561228287841191,
"calib/step_q_w_n": 806.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2954.0,
"completions/max_terminated_length": 2954.0,
"completions/mean_length": 609.796875,
"completions/mean_terminated_length": 617.0277099609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.192,
"grad_norm": 0.24032698571681976,
"learning_rate": 5.833333333333334e-07,
"loss": -0.0097,
"num_tokens": 46974912.0,
"reward": 1.052734375,
"reward_std": 0.2881550192832947,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.97265625,
"step": 180
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.328125,
"calib/ece": 0.4085826771653546,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.05511811023622047,
"calib/gap": 0.008513647642680033,
"calib/mean_conf": 0.8967716535433072,
"calib/mu_c": 0.9011290322580648,
"calib/mu_w": 0.8926153846153848,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4085826771653546,
"calib/std_conf": 0.05877867053501822,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6302722772277228,
"calib/step_q_c_n": 808.0,
"calib/step_q_gap": 0.03709001121787059,
"calib/step_q_w": 0.5931822660098522,
"calib/step_q_w_n": 812.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2577.0,
"completions/max_terminated_length": 2577.0,
"completions/mean_length": 525.05859375,
"completions/mean_terminated_length": 527.11767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.26311588287353516,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0051,
"num_tokens": 47239399.0,
"reward": 0.98046875,
"reward_std": 0.29605424404144287,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.9921875,
"step": 181
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.359375,
"calib/ece": 0.278149606299213,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.03543307086614173,
"calib/gap": -0.002381640291548748,
"calib/mean_conf": 0.8962598425196852,
"calib/mu_c": 0.895350318471338,
"calib/mu_w": 0.8977319587628867,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.278149606299213,
"calib/std_conf": 0.022267773613621172,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.644875406283857,
"calib/step_q_c_n": 923.0,
"calib/step_q_gap": 0.09663427153208393,
"calib/step_q_w": 0.548241134751773,
"calib/step_q_w_n": 705.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1650.0,
"completions/max_terminated_length": 1650.0,
"completions/mean_length": 542.12109375,
"completions/mean_terminated_length": 546.3897705078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.2820543646812439,
"learning_rate": 5.277777777777779e-07,
"loss": -0.0056,
"num_tokens": 47508150.0,
"reward": 1.10546875,
"reward_std": 0.27713069319725037,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/format_reward_step": 0.984375,
"step": 182
},
{
"calib/answer_extract_rate": 0.984375,
"calib/avg_num_step_conf": 6.92578125,
"calib/ece": 0.41074509803921594,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.043137254901960784,
"calib/gap": 0.015864661654135803,
"calib/mean_conf": 0.8817254901960786,
"calib/mu_c": 0.8900000000000003,
"calib/mu_w": 0.8741353383458645,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4070196078431375,
"calib/std_conf": 0.12458622943943756,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5766496815286625,
"calib/step_q_c_n": 785.0,
"calib/step_q_gap": 0.040550390748520626,
"calib/step_q_w": 0.5360992907801418,
"calib/step_q_w_n": 987.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2515.0,
"completions/max_terminated_length": 2515.0,
"completions/mean_length": 590.49609375,
"completions/mean_terminated_length": 590.49609375,
"completions/min_length": 188.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.1952,
"grad_norm": 0.2631720006465912,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0362,
"num_tokens": 47789805.0,
"reward": 0.966796875,
"reward_std": 0.3247433304786682,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/format_reward_step": 0.98046875,
"step": 183
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 6.4921875,
"calib/ece": 0.2305905511811025,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.027559055118110236,
"calib/gap": 0.0012464985994394828,
"calib/mean_conf": 0.8998818897637797,
"calib/mu_c": 0.900294117647059,
"calib/mu_w": 0.8990476190476195,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2305905511811025,
"calib/std_conf": 0.013987685344998382,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.678563484708063,
"calib/step_q_c_n": 1079.0,
"calib/step_q_gap": 0.06098200957941802,
"calib/step_q_w": 0.617581475128645,
"calib/step_q_w_n": 583.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1598.0,
"completions/max_terminated_length": 1598.0,
"completions/mean_length": 522.69140625,
"completions/mean_terminated_length": 524.7412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.23894599080085754,
"learning_rate": 4.7222222222222226e-07,
"loss": -0.0032,
"num_tokens": 48052702.0,
"reward": 1.16015625,
"reward_std": 0.24039921164512634,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/format_reward_step": 0.9921875,
"step": 184
},
{
"calib/answer_extract_rate": 0.95703125,
"calib/avg_num_step_conf": 7.43359375,
"calib/ece": 0.3223170731707319,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.07723577235772358,
"calib/gap": -0.0016075297941493538,
"calib/mean_conf": 0.8995528455284556,
"calib/mu_c": 0.8988732394366201,
"calib/mu_w": 0.9004807692307695,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3223170731707319,
"calib/std_conf": 0.021323182259200085,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6366867469879519,
"calib/step_q_c_n": 830.0,
"calib/step_q_gap": 0.14737640216036574,
"calib/step_q_w": 0.4893103448275862,
"calib/step_q_w_n": 1073.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2774.0,
"completions/max_terminated_length": 2774.0,
"completions/mean_length": 536.65625,
"completions/mean_terminated_length": 549.5360107421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 224.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.2608066499233246,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0132,
"num_tokens": 48320814.0,
"reward": 1.03125,
"reward_std": 0.29780781269073486,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.953125,
"step": 185
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.8828125,
"calib/ece": 0.2933333333333336,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.027450980392156862,
"calib/gap": -0.028536184210526616,
"calib/mean_conf": 0.8815686274509805,
"calib/mu_c": 0.8709375000000001,
"calib/mu_w": 0.8994736842105268,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.27372549019607867,
"calib/std_conf": 0.1254227760068057,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5934462151394424,
"calib/step_q_c_n": 1004.0,
"calib/step_q_gap": 0.07409265313416535,
"calib/step_q_w": 0.519353562005277,
"calib/step_q_w_n": 758.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1705.0,
"completions/max_terminated_length": 1705.0,
"completions/mean_length": 562.0390625,
"completions/mean_terminated_length": 564.2431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.1984,
"grad_norm": 0.23022688925266266,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0173,
"num_tokens": 48593544.0,
"reward": 1.123046875,
"reward_std": 0.23529818654060364,
"rewards/accuracy_reward_step": 0.625,
"rewards/format_reward_step": 0.99609375,
"step": 186
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 7.0234375,
"calib/ece": 0.3737549407114627,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.07905138339920949,
"calib/gap": -0.001211152882205635,
"calib/mean_conf": 0.8994466403162058,
"calib/mu_c": 0.8988721804511278,
"calib/mu_w": 0.9000833333333335,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3737549407114627,
"calib/std_conf": 0.01989324568516636,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.5791103603603603,
"calib/step_q_c_n": 888.0,
"calib/step_q_gap": 0.004967503217503166,
"calib/step_q_w": 0.5741428571428572,
"calib/step_q_w_n": 910.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2747.0,
"completions/max_terminated_length": 2747.0,
"completions/mean_length": 560.66015625,
"completions/mean_terminated_length": 562.85888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.24886764585971832,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0519,
"num_tokens": 48862425.0,
"reward": 1.009765625,
"reward_std": 0.2750778794288635,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/format_reward_step": 0.98046875,
"step": 187
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.9375,
"calib/ece": 0.3312500000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0546875,
"calib/gap": -0.005673723536737341,
"calib/mean_conf": 0.8992187500000002,
"calib/mu_c": 0.8967808219178085,
"calib/mu_w": 0.9024545454545458,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3300781250000001,
"calib/std_conf": 0.01639880783586111,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6178296988577363,
"calib/step_q_c_n": 963.0,
"calib/step_q_gap": -0.009353572974982072,
"calib/step_q_w": 0.6271832718327184,
"calib/step_q_w_n": 813.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2013.0,
"completions/max_terminated_length": 2013.0,
"completions/mean_length": 552.70703125,
"completions/mean_terminated_length": 554.8745727539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.2365780621767044,
"learning_rate": 3.611111111111111e-07,
"loss": 0.018,
"num_tokens": 49131798.0,
"reward": 1.06640625,
"reward_std": 0.2518237829208374,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.9921875,
"step": 188
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.2734375,
"calib/ece": 0.2853125000000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.03515625,
"calib/gap": -0.0008106543138390876,
"calib/mean_conf": 0.8985937500000003,
"calib/mu_c": 0.8982802547770703,
"calib/mu_w": 0.8990909090909094,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2853125000000002,
"calib/std_conf": 0.018444781401184995,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6221251348435815,
"calib/step_q_c_n": 927.0,
"calib/step_q_gap": 0.04616048094078329,
"calib/step_q_w": 0.5759646539027982,
"calib/step_q_w_n": 679.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1269.0,
"completions/max_terminated_length": 1269.0,
"completions/mean_length": 529.71875,
"completions/mean_terminated_length": 531.7960815429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.2016,
"grad_norm": 0.2585393488407135,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0195,
"num_tokens": 49398982.0,
"reward": 1.11328125,
"reward_std": 0.2384297251701355,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/format_reward_step": 1.0,
"step": 189
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.9453125,
"calib/ece": 0.33152343750000024,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.046875,
"calib/gap": 0.0004802733768247691,
"calib/mean_conf": 0.8979296875000002,
"calib/mu_c": 0.8981379310344829,
"calib/mu_w": 0.8976576576576581,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33152343750000024,
"calib/std_conf": 0.018833432537706554,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.5846989141164858,
"calib/step_q_c_n": 1013.0,
"calib/step_q_gap": 0.032894992547858326,
"calib/step_q_w": 0.5518039215686275,
"calib/step_q_w_n": 765.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1333.0,
"completions/max_terminated_length": 1333.0,
"completions/mean_length": 573.59765625,
"completions/mean_terminated_length": 575.8471069335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.2151663452386856,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0058,
"num_tokens": 49675239.0,
"reward": 1.06640625,
"reward_std": 0.2194880247116089,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 1.0,
"step": 190
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.80078125,
"calib/ece": 0.38462745098039236,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.06274509803921569,
"calib/gap": 0.003984615384615031,
"calib/mean_conf": 0.8944313725490197,
"calib/mu_c": 0.8963846153846154,
"calib/mu_w": 0.8924000000000004,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.38462745098039236,
"calib/std_conf": 0.04210596106182162,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6451715976331361,
"calib/step_q_c_n": 845.0,
"calib/step_q_gap": 0.07548409763313613,
"calib/step_q_w": 0.5696875,
"calib/step_q_w_n": 896.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1871.0,
"completions/max_terminated_length": 1871.0,
"completions/mean_length": 535.6328125,
"completions/mean_terminated_length": 537.7333374023438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.2562410831451416,
"learning_rate": 2.7777777777777776e-07,
"loss": -0.0141,
"num_tokens": 49940337.0,
"reward": 1.005859375,
"reward_std": 0.2665815055370331,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/format_reward_step": 0.99609375,
"step": 191
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.07421875,
"calib/ece": 0.31270588235294133,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.08235294117647059,
"calib/gap": 0.003866666666666685,
"calib/mean_conf": 0.9009411764705882,
"calib/mu_c": 0.9025333333333336,
"calib/mu_w": 0.898666666666667,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31270588235294133,
"calib/std_conf": 0.021618267298025014,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6592729439809298,
"calib/step_q_c_n": 839.0,
"calib/step_q_gap": 0.0744824411876337,
"calib/step_q_w": 0.5847905027932961,
"calib/step_q_w_n": 716.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1958.0,
"completions/max_terminated_length": 1958.0,
"completions/mean_length": 546.18359375,
"completions/mean_terminated_length": 548.3255004882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 41.0,
"epoch": 0.2048,
"grad_norm": 0.23097290098667145,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0133,
"num_tokens": 50208944.0,
"reward": 1.083984375,
"reward_std": 0.2510668635368347,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.99609375,
"step": 192
},
{
"calib/answer_extract_rate": 1.0,
"calib/avg_num_step_conf": 6.25390625,
"calib/ece": 0.3089453125000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.07421875,
"calib/gap": 0.010991030546321334,
"calib/mean_conf": 0.8909765625000002,
"calib/mu_c": 0.8955704697986581,
"calib/mu_w": 0.8845794392523367,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3089453125000002,
"calib/std_conf": 0.052226065816636,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.663194130925508,
"calib/step_q_c_n": 886.0,
"calib/step_q_gap": 0.03484448057585765,
"calib/step_q_w": 0.6283496503496504,
"calib/step_q_w_n": 715.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1400.0,
"completions/max_terminated_length": 1400.0,
"completions/mean_length": 521.45703125,
"completions/mean_terminated_length": 523.5020141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.26548540592193604,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0236,
"num_tokens": 50471957.0,
"reward": 1.08203125,
"reward_std": 0.305894136428833,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 1.0,
"step": 193
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.98046875,
"calib/ece": 0.37913385826771684,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.07086614173228346,
"calib/gap": 0.005250418916402655,
"calib/mean_conf": 0.891732283464567,
"calib/mu_c": 0.8942748091603054,
"calib/mu_w": 0.8890243902439028,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.37755905511811055,
"calib/std_conf": 0.06434521179242914,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.616088560885609,
"calib/step_q_c_n": 813.0,
"calib/step_q_gap": 0.008372683448283125,
"calib/step_q_w": 0.6077158774373259,
"calib/step_q_w_n": 718.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1359.0,
"completions/max_terminated_length": 1359.0,
"completions/mean_length": 490.5078125,
"completions/mean_terminated_length": 492.431396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.22926728427410126,
"learning_rate": 1.9444444444444447e-07,
"loss": -0.0175,
"num_tokens": 50727279.0,
"reward": 1.0078125,
"reward_std": 0.22462846338748932,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.9921875,
"step": 194
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 5.99609375,
"calib/ece": 0.3208661417322838,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.051181102362204724,
"calib/gap": -0.0008417572636532089,
"calib/mean_conf": 0.8996062992125985,
"calib/mu_c": 0.8992517006802723,
"calib/mu_w": 0.9000934579439255,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3208661417322838,
"calib/std_conf": 0.01993697055637064,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6379933481152994,
"calib/step_q_c_n": 902.0,
"calib/step_q_gap": 0.038135528210086034,
"calib/step_q_w": 0.5998578199052134,
"calib/step_q_w_n": 633.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1642.0,
"completions/max_terminated_length": 1642.0,
"completions/mean_length": 515.328125,
"completions/mean_terminated_length": 517.3490600585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.208,
"grad_norm": 0.2591729164123535,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.0099,
"num_tokens": 50988995.0,
"reward": 1.068359375,
"reward_std": 0.25752705335617065,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.98828125,
"step": 195
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 5.734375,
"calib/ece": 0.28435294117647086,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.043137254901960784,
"calib/gap": 0.01677419354838705,
"calib/mean_conf": 0.8921960784313727,
"calib/mu_c": 0.8987741935483875,
"calib/mu_w": 0.8820000000000005,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28435294117647086,
"calib/std_conf": 0.07015305944381316,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6559217877094973,
"calib/step_q_c_n": 895.0,
"calib/step_q_gap": 0.032710618425029536,
"calib/step_q_w": 0.6232111692844677,
"calib/step_q_w_n": 573.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2382.0,
"completions/max_terminated_length": 2382.0,
"completions/mean_length": 460.08203125,
"completions/mean_terminated_length": 460.08203125,
"completions/min_length": 190.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.2433953881263733,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.0054,
"num_tokens": 51233128.0,
"reward": 1.103515625,
"reward_std": 0.18032386898994446,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.99609375,
"step": 196
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.0625,
"calib/ece": 0.38945098039215703,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.027450980392156862,
"calib/gap": 0.006306755260243269,
"calib/mean_conf": 0.8953333333333334,
"calib/mu_c": 0.8984496124031008,
"calib/mu_w": 0.8921428571428576,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.38945098039215703,
"calib/std_conf": 0.05834806536660958,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.6825699067909454,
"calib/step_q_c_n": 751.0,
"calib/step_q_gap": 0.05902433875099533,
"calib/step_q_w": 0.6235455680399501,
"calib/step_q_w_n": 801.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2409.0,
"completions/max_terminated_length": 2409.0,
"completions/mean_length": 535.27734375,
"completions/mean_terminated_length": 535.27734375,
"completions/min_length": 197.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.2739056944847107,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0172,
"num_tokens": 51499023.0,
"reward": 1.001953125,
"reward_std": 0.28341710567474365,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/format_reward_step": 0.99609375,
"step": 197
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/avg_num_step_conf": 6.0546875,
"calib/ece": 0.2997647058823531,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.043137254901960784,
"calib/gap": 0.0005882352941175562,
"calib/mean_conf": 0.899764705882353,
"calib/mu_c": 0.9000000000000002,
"calib/mu_w": 0.8994117647058827,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2997647058823531,
"calib/std_conf": 0.016754967932040052,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6448475289169295,
"calib/step_q_c_n": 951.0,
"calib/step_q_gap": 0.02696772925081936,
"calib/step_q_w": 0.6178797996661102,
"calib/step_q_w_n": 599.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1541.0,
"completions/max_terminated_length": 1541.0,
"completions/mean_length": 489.25390625,
"completions/mean_terminated_length": 489.25390625,
"completions/min_length": 146.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.2112,
"grad_norm": 0.26366642117500305,
"learning_rate": 8.333333333333334e-08,
"loss": 0.033,
"num_tokens": 51753464.0,
"reward": 1.09375,
"reward_std": 0.23958630859851837,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.9921875,
"step": 198
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/avg_num_step_conf": 6.60546875,
"calib/ece": 0.34881422924901206,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05533596837944664,
"calib/gap": -0.03208812260536431,
"calib/mean_conf": 0.8843873517786562,
"calib/mu_c": 0.8706896551724138,
"calib/mu_w": 0.9027777777777781,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.33003952569169986,
"calib/std_conf": 0.11989834362679651,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.6364526659412406,
"calib/step_q_c_n": 919.0,
"calib/step_q_gap": 0.042475982003416735,
"calib/step_q_w": 0.5939766839378239,
"calib/step_q_w_n": 772.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2360.0,
"completions/max_terminated_length": 2360.0,
"completions/mean_length": 576.015625,
"completions/mean_terminated_length": 578.2745361328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.27529919147491455,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0123,
"num_tokens": 52028932.0,
"reward": 1.05859375,
"reward_std": 0.29672741889953613,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.984375,
"step": 199
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/avg_num_step_conf": 5.5546875,
"calib/ece": 0.2839920948616605,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.06719367588932806,
"calib/gap": 0.0021319058947925074,
"calib/mean_conf": 0.9005928853754942,
"calib/mu_c": 0.9014102564102566,
"calib/mu_w": 0.8992783505154641,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2839920948616605,
"calib/std_conf": 0.015680993151585224,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.6641888619854723,
"calib/step_q_c_n": 826.0,
"calib/step_q_gap": 0.0346083250727206,
"calib/step_q_w": 0.6295805369127517,
"calib/step_q_w_n": 596.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2556.0,
"completions/max_terminated_length": 2556.0,
"completions/mean_length": 537.5,
"completions/mean_terminated_length": 539.6078491210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 57.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.24357278645038605,
"learning_rate": 2.777777777777778e-08,
"loss": -0.0128,
"num_tokens": 52298388.0,
"reward": 1.103515625,
"reward_std": 0.20465883612632751,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.98828125,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.020373184509808198,
"train_runtime": 5912.1429,
"train_samples_per_second": 8.66,
"train_steps_per_second": 0.034
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 52298388,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}