Files
PureRL-1.5B-v7-stage1-reaso…/trainer_state.json
ModelHub XC 9d0a1e7848 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v7-stage1-reasoning
Source: Original Platform
2026-06-06 20:55:26 +08:00

9238 lines
353 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.6944444444444445,
"calib/avg_num_step_conf": 0.3359375,
"calib/ece": 0.6230769230769231,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.03861111111111115,
"calib/mean_conf": 0.9307692307692309,
"calib/mu_c": 0.9575,
"calib/mu_w": 0.9188888888888889,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.6230769230769231,
"calib/std_conf": 0.07965903671384378,
"calib/step_conf_rate": 0.0703125,
"calib/step_q_c": 0.8921052631578947,
"calib/step_q_c_n": 19.0,
"calib/step_q_gap": 0.19807541241162607,
"calib/step_q_w": 0.6940298507462687,
"calib/step_q_w_n": 67.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 613.67578125,
"completions/mean_terminated_length": 674.2532348632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.004072976764291525,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0316,
"num_tokens": 264685.0,
"reward": 0.037109375,
"reward_std": 0.07518024742603302,
"rewards/accuracy_reward_step": 0.015625,
"rewards/format_reward_step": 0.04296875,
"step": 1
},
{
"calib/answer_extract_rate": 0.13671875,
"calib/auroc": 0.5338345864661654,
"calib/avg_num_step_conf": 0.55078125,
"calib/ece": 0.6261538461538463,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.08984375,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.002406015037593856,
"calib/mean_conf": 0.8953846153846153,
"calib/mu_c": 0.897142857142857,
"calib/mu_w": 0.8947368421052632,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.14453125,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.6261538461538463,
"calib/std_conf": 0.18653172073466937,
"calib/step_conf_rate": 0.109375,
"calib/step_q_c": 0.781,
"calib/step_q_c_n": 20.0,
"calib/step_q_gap": -0.042553719008264435,
"calib/step_q_w": 0.8235537190082645,
"calib/step_q_w_n": 121.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 646.4609375,
"completions/mean_terminated_length": 683.8594970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.006365728098899126,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0624,
"num_tokens": 533467.0,
"reward": 0.076171875,
"reward_std": 0.143990620970726,
"rewards/accuracy_reward_step": 0.03125,
"rewards/format_reward_step": 0.08984375,
"step": 2
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.1111111111111111,
"calib/avg_num_step_conf": 0.27734375,
"calib/ece": 0.79,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.8181818181818182,
"calib/gap": -0.06666666666666654,
"calib/mean_conf": 0.9445454545454546,
"calib/mu_c": 0.89,
"calib/mu_w": 0.9566666666666666,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.0625,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.7763636363636364,
"calib/std_conf": 0.03985510948505994,
"calib/step_conf_rate": 0.05078125,
"calib/step_q_c": 0.791875,
"calib/step_q_c_n": 16.0,
"calib/step_q_gap": 0.06823863636363625,
"calib/step_q_w": 0.7236363636363637,
"calib/step_q_w_n": 55.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2785.0,
"completions/max_terminated_length": 2785.0,
"completions/mean_length": 623.34375,
"completions/mean_terminated_length": 684.8755493164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.004042081534862518,
"learning_rate": 7.5e-07,
"loss": 0.0159,
"num_tokens": 798299.0,
"reward": 0.02734375,
"reward_std": 0.06346043199300766,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/format_reward_step": 0.0390625,
"step": 3
},
{
"calib/answer_extract_rate": 0.05859375,
"calib/avg_num_step_conf": 0.26171875,
"calib/ece": 0.9642857142857143,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 0.9642857142857143,
"calib/mu_c": NaN,
"calib/mu_w": 0.9642857142857143,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.0546875,
"calib/pce": 0.9642857142857143,
"calib/std_conf": 0.013997084244475315,
"calib/step_conf_rate": 0.0546875,
"calib/step_q_w": 0.758955223880597,
"calib/step_q_w_n": 67.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2999.0,
"completions/max_terminated_length": 2999.0,
"completions/mean_length": 706.9296875,
"completions/mean_terminated_length": 776.7124633789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.002299716928973794,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0139,
"num_tokens": 1085441.0,
"reward": 0.01171875,
"reward_std": 0.02551448345184326,
"rewards/accuracy_reward_step": 0.0,
"rewards/format_reward_step": 0.0234375,
"step": 4
},
{
"calib/answer_extract_rate": 0.07421875,
"calib/auroc": 0.47619047619047616,
"calib/avg_num_step_conf": 0.5078125,
"calib/ece": 0.786470588235294,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.05078125,
"calib/frac_conf_gt_0.9": 0.7647058823529411,
"calib/gap": -0.043571428571428594,
"calib/mean_conf": 0.9358823529411765,
"calib/mu_c": 0.9,
"calib/mu_w": 0.9435714285714286,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.09375,
"calib/pce": 0.7729411764705882,
"calib/std_conf": 0.05402165399491225,
"calib/step_conf_rate": 0.09375,
"calib/step_q_c": 0.7225,
"calib/step_q_c_n": 12.0,
"calib/step_q_gap": -0.04648305084745752,
"calib/step_q_w": 0.7689830508474575,
"calib/step_q_w_n": 118.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2801.0,
"completions/max_terminated_length": 2801.0,
"completions/mean_length": 665.02734375,
"completions/mean_terminated_length": 718.3417358398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.005368305370211601,
"learning_rate": 1.25e-06,
"loss": 0.0187,
"num_tokens": 1362376.0,
"reward": 0.041015625,
"reward_std": 0.10456298291683197,
"rewards/accuracy_reward_step": 0.015625,
"rewards/format_reward_step": 0.05078125,
"step": 5
},
{
"calib/answer_extract_rate": 0.07421875,
"calib/auroc": 0.8333333333333334,
"calib/avg_num_step_conf": 0.42578125,
"calib/ece": 0.7906249999999999,
"calib/final_conf_rate": 0.0625,
"calib/format_rate": 0.0546875,
"calib/frac_conf_gt_0.9": 0.6875,
"calib/gap": 0.14600000000000002,
"calib/mean_conf": 0.8531249999999999,
"calib/mu_c": 0.99,
"calib/mu_w": 0.844,
"calib/nonempty_final_conf_rate": 0.0625,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.0859375,
"calib/pce": 0.7906249999999999,
"calib/std_conf": 0.2658881049896742,
"calib/step_conf_rate": 0.0859375,
"calib/step_q_c": 0.9866666666666667,
"calib/step_q_c_n": 3.0,
"calib/step_q_gap": 0.22364779874213825,
"calib/step_q_w": 0.7630188679245284,
"calib/step_q_w_n": 106.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1171875,
"completions/max_length": 3037.0,
"completions/max_terminated_length": 3037.0,
"completions/mean_length": 605.75390625,
"completions/mean_terminated_length": 686.1636962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.005905688274651766,
"learning_rate": 1.5e-06,
"loss": -0.0005,
"num_tokens": 1623401.0,
"reward": 0.03125,
"reward_std": 0.07227109372615814,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/format_reward_step": 0.0546875,
"step": 6
},
{
"calib/answer_extract_rate": 0.09375,
"calib/auroc": 0.6785714285714286,
"calib/avg_num_step_conf": 0.453125,
"calib/ece": 0.8293333333333335,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.6,
"calib/gap": 0.0685714285714285,
"calib/mean_conf": 0.8960000000000001,
"calib/mu_c": 0.96,
"calib/mu_w": 0.8914285714285715,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.11328125,
"calib/nonempty_step_conf_rate": 0.07421875,
"calib/pce": 0.8293333333333335,
"calib/std_conf": 0.10150862032359616,
"calib/step_conf_rate": 0.07421875,
"calib/step_q_c": 0.83,
"calib/step_q_c_n": 4.0,
"calib/step_q_gap": 0.08101785714285714,
"calib/step_q_w": 0.7489821428571428,
"calib/step_q_w_n": 112.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 3013.0,
"completions/max_terminated_length": 3013.0,
"completions/mean_length": 713.95703125,
"completions/mean_terminated_length": 791.22509765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.004854544997215271,
"learning_rate": 1.75e-06,
"loss": 0.0212,
"num_tokens": 1913598.0,
"reward": 0.0234375,
"reward_std": 0.06629125773906708,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/format_reward_step": 0.0390625,
"step": 7
},
{
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.6458333333333334,
"calib/avg_num_step_conf": 0.37109375,
"calib/ece": 0.5511333333333334,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": 0.11330000000000007,
"calib/mean_conf": 0.8844666666666666,
"calib/mu_c": 0.96,
"calib/mu_w": 0.8466999999999999,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.109375,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.5511333333333334,
"calib/std_conf": 0.2288445663666838,
"calib/step_conf_rate": 0.08984375,
"calib/step_q_c": 0.8510344827586208,
"calib/step_q_c_n": 29.0,
"calib/step_q_gap": 0.08688296760710568,
"calib/step_q_w": 0.7641515151515151,
"calib/step_q_w_n": 66.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3069.0,
"completions/max_terminated_length": 3069.0,
"completions/mean_length": 695.8515625,
"completions/mean_terminated_length": 758.0339965820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.0034913497511297464,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0336,
"num_tokens": 2198248.0,
"reward": 0.052734375,
"reward_std": 0.11081699281930923,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/format_reward_step": 0.05859375,
"step": 8
},
{
"calib/answer_extract_rate": 0.0703125,
"calib/auroc": 0.7545454545454545,
"calib/avg_num_step_conf": 0.34765625,
"calib/ece": 0.6024999999999999,
"calib/final_conf_rate": 0.0625,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.8125,
"calib/gap": 0.07127272727272738,
"calib/mean_conf": 0.915,
"calib/mu_c": 0.9640000000000001,
"calib/mu_w": 0.8927272727272727,
"calib/nonempty_final_conf_rate": 0.0625,
"calib/nonempty_reasoning_rate": 0.09375,
"calib/nonempty_step_conf_rate": 0.08203125,
"calib/pce": 0.6024999999999999,
"calib/std_conf": 0.13847382424126228,
"calib/step_conf_rate": 0.08203125,
"calib/step_q_c": 0.8352941176470587,
"calib/step_q_c_n": 17.0,
"calib/step_q_gap": 0.04543300653594762,
"calib/step_q_w": 0.7898611111111111,
"calib/step_q_w_n": 72.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 2929.0,
"completions/max_terminated_length": 2929.0,
"completions/mean_length": 649.29296875,
"completions/mean_terminated_length": 719.562744140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 0.00364897632971406,
"learning_rate": 2.25e-06,
"loss": 0.0231,
"num_tokens": 2472003.0,
"reward": 0.041015625,
"reward_std": 0.09140633791685104,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/format_reward_step": 0.04296875,
"step": 9
},
{
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.4083333333333333,
"calib/avg_num_step_conf": 0.421875,
"calib/ece": 0.8147826086956523,
"calib/final_conf_rate": 0.08984375,
"calib/format_rate": 0.0625,
"calib/frac_conf_gt_0.9": 0.8695652173913043,
"calib/gap": -0.006000000000000338,
"calib/mean_conf": 0.9452173913043481,
"calib/mu_c": 0.94,
"calib/mu_w": 0.9460000000000003,
"calib/nonempty_final_conf_rate": 0.08984375,
"calib/nonempty_reasoning_rate": 0.125,
"calib/nonempty_step_conf_rate": 0.08203125,
"calib/pce": 0.8147826086956523,
"calib/std_conf": 0.033345303458806964,
"calib/step_conf_rate": 0.08203125,
"calib/step_q_c": 0.8081818181818181,
"calib/step_q_c_n": 11.0,
"calib/step_q_gap": 0.04779006560449861,
"calib/step_q_w": 0.7603917525773195,
"calib/step_q_w_n": 97.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3008.0,
"completions/max_terminated_length": 3008.0,
"completions/mean_length": 662.890625,
"completions/mean_terminated_length": 722.1276245117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.0058372789062559605,
"learning_rate": 2.5e-06,
"loss": 0.0497,
"num_tokens": 2748503.0,
"reward": 0.04296875,
"reward_std": 0.10015765577554703,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/format_reward_step": 0.0625,
"step": 10
},
{
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.4722222222222222,
"calib/avg_num_step_conf": 0.6328125,
"calib/ece": 0.6062962962962962,
"calib/final_conf_rate": 0.10546875,
"calib/format_rate": 0.0859375,
"calib/frac_conf_gt_0.9": 0.8148148148148148,
"calib/gap": 0.01722222222222225,
"calib/mean_conf": 0.9396296296296295,
"calib/mu_c": 0.951111111111111,
"calib/mu_w": 0.9338888888888888,
"calib/nonempty_final_conf_rate": 0.10546875,
"calib/nonempty_reasoning_rate": 0.1796875,
"calib/nonempty_step_conf_rate": 0.1484375,
"calib/pce": 0.6062962962962962,
"calib/std_conf": 0.050733981707163735,
"calib/step_conf_rate": 0.1484375,
"calib/step_q_c": 0.7855208333333333,
"calib/step_q_c_n": 32.0,
"calib/step_q_gap": 0.017208430232558136,
"calib/step_q_w": 0.7683124031007752,
"calib/step_q_w_n": 129.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 3064.0,
"completions/max_terminated_length": 3064.0,
"completions/mean_length": 675.328125,
"completions/mean_terminated_length": 720.3500366210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.005052024032920599,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0647,
"num_tokens": 3025867.0,
"reward": 0.078125,
"reward_std": 0.1473740190267563,
"rewards/accuracy_reward_step": 0.03515625,
"rewards/format_reward_step": 0.0859375,
"step": 11
},
{
"calib/answer_extract_rate": 0.18359375,
"calib/auroc": 0.3007246376811594,
"calib/avg_num_step_conf": 0.6015625,
"calib/ece": 0.6214285714285712,
"calib/final_conf_rate": 0.13671875,
"calib/format_rate": 0.109375,
"calib/frac_conf_gt_0.9": 0.7714285714285715,
"calib/gap": -0.06829710144927525,
"calib/mean_conf": 0.9357142857142857,
"calib/mu_c": 0.8908333333333333,
"calib/mu_w": 0.9591304347826085,
"calib/nonempty_final_conf_rate": 0.13671875,
"calib/nonempty_reasoning_rate": 0.21484375,
"calib/nonempty_step_conf_rate": 0.14453125,
"calib/pce": 0.607142857142857,
"calib/std_conf": 0.0871217773098819,
"calib/step_conf_rate": 0.14453125,
"calib/step_q_c": 0.7774651162790697,
"calib/step_q_c_n": 43.0,
"calib/step_q_gap": -0.023886235072281714,
"calib/step_q_w": 0.8013513513513514,
"calib/step_q_w_n": 111.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 3068.0,
"completions/max_terminated_length": 3068.0,
"completions/mean_length": 503.56640625,
"completions/mean_terminated_length": 558.06494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0128,
"grad_norm": 0.007954644039273262,
"learning_rate": 3e-06,
"loss": 0.0132,
"num_tokens": 3258956.0,
"reward": 0.1015625,
"reward_std": 0.16241982579231262,
"rewards/accuracy_reward_step": 0.046875,
"rewards/format_reward_step": 0.109375,
"step": 12
},
{
"calib/answer_extract_rate": 0.1953125,
"calib/auroc": 0.7095238095238096,
"calib/avg_num_step_conf": 1.1171875,
"calib/ece": 0.6761702127659575,
"calib/final_conf_rate": 0.18359375,
"calib/format_rate": 0.15625,
"calib/frac_conf_gt_0.9": 0.8085106382978723,
"calib/gap": 0.04947619047619067,
"calib/mean_conf": 0.9314893617021275,
"calib/mu_c": 0.9683333333333334,
"calib/mu_w": 0.9188571428571427,
"calib/nonempty_final_conf_rate": 0.18359375,
"calib/nonempty_reasoning_rate": 0.265625,
"calib/nonempty_step_conf_rate": 0.23828125,
"calib/pce": 0.6761702127659575,
"calib/std_conf": 0.10431088402043429,
"calib/step_conf_rate": 0.23828125,
"calib/step_q_c": 0.7465442307692307,
"calib/step_q_c_n": 52.0,
"calib/step_q_gap": 0.020316880341880172,
"calib/step_q_w": 0.7262273504273505,
"calib/step_q_w_n": 234.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 3009.0,
"completions/max_terminated_length": 3009.0,
"completions/mean_length": 592.3671875,
"completions/mean_terminated_length": 629.236572265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.010851632803678513,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0849,
"num_tokens": 3515194.0,
"reward": 0.12890625,
"reward_std": 0.21148112416267395,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/format_reward_step": 0.15625,
"step": 13
},
{
"calib/answer_extract_rate": 0.26953125,
"calib/auroc": 0.5944444444444444,
"calib/avg_num_step_conf": 1.2734375,
"calib/ece": 0.6903459016393441,
"calib/final_conf_rate": 0.23828125,
"calib/format_rate": 0.21875,
"calib/frac_conf_gt_0.9": 0.7704918032786885,
"calib/gap": -0.0544688888888889,
"calib/mean_conf": 0.8801819672131148,
"calib/mu_c": 0.84,
"calib/mu_w": 0.8944688888888889,
"calib/nonempty_final_conf_rate": 0.23828125,
"calib/nonempty_reasoning_rate": 0.33203125,
"calib/nonempty_step_conf_rate": 0.29296875,
"calib/pce": 0.6541163934426227,
"calib/std_conf": 0.22998876345196273,
"calib/step_conf_rate": 0.29296875,
"calib/step_q_c": 0.8246969696969697,
"calib/step_q_c_n": 66.0,
"calib/step_q_gap": 0.03877773892773895,
"calib/step_q_w": 0.7859192307692308,
"calib/step_q_w_n": 260.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 3048.0,
"completions/max_terminated_length": 3048.0,
"completions/mean_length": 598.984375,
"completions/mean_terminated_length": 638.9166870117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.011941647157073021,
"learning_rate": 3.5e-06,
"loss": 0.0874,
"num_tokens": 3773934.0,
"reward": 0.171875,
"reward_std": 0.27124205231666565,
"rewards/accuracy_reward_step": 0.0625,
"rewards/format_reward_step": 0.21875,
"step": 14
},
{
"calib/answer_extract_rate": 0.41796875,
"calib/auroc": 0.525,
"calib/avg_num_step_conf": 1.9609375,
"calib/ece": 0.6812045454545455,
"calib/final_conf_rate": 0.34375,
"calib/format_rate": 0.3046875,
"calib/frac_conf_gt_0.9": 0.7386363636363636,
"calib/gap": 0.002794117647058725,
"calib/mean_conf": 0.8873409090909092,
"calib/mu_c": 0.8895,
"calib/mu_w": 0.8867058823529412,
"calib/nonempty_final_conf_rate": 0.34375,
"calib/nonempty_reasoning_rate": 0.46875,
"calib/nonempty_step_conf_rate": 0.3671875,
"calib/pce": 0.6706363636363637,
"calib/std_conf": 0.20150608418310825,
"calib/step_conf_rate": 0.3671875,
"calib/step_q_c": 0.7998780487804878,
"calib/step_q_c_n": 82.0,
"calib/step_q_gap": 0.03317754878048773,
"calib/step_q_w": 0.7667005000000001,
"calib/step_q_w_n": 420.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2684.0,
"completions/max_terminated_length": 2684.0,
"completions/mean_length": 521.24609375,
"completions/mean_terminated_length": 542.4349365234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.016,
"grad_norm": 0.01295667327940464,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.1144,
"num_tokens": 4015253.0,
"reward": 0.23046875,
"reward_std": 0.3025348484516144,
"rewards/accuracy_reward_step": 0.078125,
"rewards/format_reward_step": 0.3046875,
"step": 15
},
{
"calib/answer_extract_rate": 0.44921875,
"calib/auroc": 0.6197289156626505,
"calib/avg_num_step_conf": 2.45703125,
"calib/ece": 0.6569582554517132,
"calib/final_conf_rate": 0.41796875,
"calib/format_rate": 0.34375,
"calib/frac_conf_gt_0.9": 0.719626168224299,
"calib/gap": 0.07901666666666685,
"calib/mean_conf": 0.8707900311526481,
"calib/mu_c": 0.9320833333333334,
"calib/mu_w": 0.8530666666666665,
"calib/nonempty_final_conf_rate": 0.41796875,
"calib/nonempty_reasoning_rate": 0.51171875,
"calib/nonempty_step_conf_rate": 0.4140625,
"calib/pce": 0.6517246105919001,
"calib/std_conf": 0.2131570209511951,
"calib/step_conf_rate": 0.4140625,
"calib/step_q_c": 0.785578947368421,
"calib/step_q_c_n": 95.0,
"calib/step_q_gap": 0.07400553912872077,
"calib/step_q_w": 0.7115734082397003,
"calib/step_q_w_n": 534.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 2855.0,
"completions/max_terminated_length": 2855.0,
"completions/mean_length": 544.0546875,
"completions/mean_terminated_length": 563.8785400390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.014287542551755905,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0985,
"num_tokens": 4263379.0,
"reward": 0.265625,
"reward_std": 0.34611278772354126,
"rewards/accuracy_reward_step": 0.09375,
"rewards/format_reward_step": 0.34375,
"step": 16
},
{
"calib/answer_extract_rate": 0.57421875,
"calib/auroc": 0.5104166666666666,
"calib/avg_num_step_conf": 2.859375,
"calib/ece": 0.6633582089552237,
"calib/final_conf_rate": 0.5234375,
"calib/format_rate": 0.4453125,
"calib/frac_conf_gt_0.9": 0.746268656716418,
"calib/gap": 0.06454487179487167,
"calib/mean_conf": 0.8872388059701491,
"calib/mu_c": 0.9373333333333334,
"calib/mu_w": 0.8727884615384617,
"calib/nonempty_final_conf_rate": 0.5234375,
"calib/nonempty_reasoning_rate": 0.65625,
"calib/nonempty_step_conf_rate": 0.58984375,
"calib/pce": 0.6633582089552237,
"calib/std_conf": 0.20051572235614626,
"calib/step_conf_rate": 0.58984375,
"calib/step_q_c": 0.7984973856209151,
"calib/step_q_c_n": 153.0,
"calib/step_q_gap": 0.06040964814250405,
"calib/step_q_w": 0.738087737478411,
"calib/step_q_w_n": 579.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 3027.0,
"completions/max_terminated_length": 3027.0,
"completions/mean_length": 477.1640625,
"completions/mean_terminated_length": 496.5609436035156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 18.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.010662867687642574,
"learning_rate": 4.25e-06,
"loss": 0.1355,
"num_tokens": 4489061.0,
"reward": 0.34375,
"reward_std": 0.382437139749527,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/format_reward_step": 0.4453125,
"step": 17
},
{
"calib/answer_extract_rate": 0.60546875,
"calib/auroc": 0.500975800156128,
"calib/avg_num_step_conf": 2.6796875,
"calib/ece": 0.7496902097902098,
"calib/final_conf_rate": 0.55859375,
"calib/format_rate": 0.46875,
"calib/frac_conf_gt_0.9": 0.7482517482517482,
"calib/gap": 0.018005581576893137,
"calib/mean_conf": 0.8965433566433565,
"calib/mu_c": 0.9119047619047619,
"calib/mu_w": 0.8938991803278687,
"calib/nonempty_final_conf_rate": 0.55859375,
"calib/nonempty_reasoning_rate": 0.70703125,
"calib/nonempty_step_conf_rate": 0.60546875,
"calib/pce": 0.7496902097902098,
"calib/std_conf": 0.18018304351173703,
"calib/step_conf_rate": 0.60546875,
"calib/step_q_c": 0.7883529411764707,
"calib/step_q_c_n": 85.0,
"calib/step_q_gap": 0.06148322403836748,
"calib/step_q_w": 0.7268697171381032,
"calib/step_q_w_n": 601.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2758.0,
"completions/max_terminated_length": 2758.0,
"completions/mean_length": 399.8984375,
"completions/mean_terminated_length": 407.86456298828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.0192,
"grad_norm": 0.011393941938877106,
"learning_rate": 4.5e-06,
"loss": 0.1707,
"num_tokens": 4702155.0,
"reward": 0.3203125,
"reward_std": 0.3561851978302002,
"rewards/accuracy_reward_step": 0.0859375,
"rewards/format_reward_step": 0.46875,
"step": 18
},
{
"calib/answer_extract_rate": 0.82421875,
"calib/auroc": 0.47573930519666957,
"calib/avg_num_step_conf": 4.03125,
"calib/ece": 0.6919258536585366,
"calib/final_conf_rate": 0.80078125,
"calib/format_rate": 0.74609375,
"calib/frac_conf_gt_0.9": 0.7170731707317073,
"calib/gap": -0.020047459086993902,
"calib/mean_conf": 0.876191219512195,
"calib/mu_c": 0.8603488372093022,
"calib/mu_w": 0.8803962962962961,
"calib/nonempty_final_conf_rate": 0.80078125,
"calib/nonempty_reasoning_rate": 0.92578125,
"calib/nonempty_step_conf_rate": 0.8828125,
"calib/pce": 0.679180487804878,
"calib/std_conf": 0.21539969816989882,
"calib/step_conf_rate": 0.8828125,
"calib/step_q_c": 0.7497914572864323,
"calib/step_q_c_n": 199.0,
"calib/step_q_gap": 0.020316867450497877,
"calib/step_q_w": 0.7294745898359344,
"calib/step_q_w_n": 833.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2477.0,
"completions/max_terminated_length": 2477.0,
"completions/mean_length": 307.09375,
"completions/mean_terminated_length": 307.09375,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.012713734991848469,
"learning_rate": 4.75e-06,
"loss": 0.094,
"num_tokens": 4885531.0,
"reward": 0.544921875,
"reward_std": 0.4169868230819702,
"rewards/accuracy_reward_step": 0.171875,
"rewards/format_reward_step": 0.74609375,
"step": 19
},
{
"calib/answer_extract_rate": 0.90625,
"calib/auroc": 0.46930579460699934,
"calib/avg_num_step_conf": 4.18359375,
"calib/ece": 0.6262445414847163,
"calib/final_conf_rate": 0.89453125,
"calib/format_rate": 0.8359375,
"calib/frac_conf_gt_0.9": 0.7379912663755459,
"calib/gap": 0.02309523809523817,
"calib/mean_conf": 0.901353711790393,
"calib/mu_c": 0.9180952380952381,
"calib/mu_w": 0.8949999999999999,
"calib/nonempty_final_conf_rate": 0.89453125,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.6262445414847163,
"calib/std_conf": 0.15072576024234574,
"calib/step_conf_rate": 0.93359375,
"calib/step_q_c": 0.7587795275590552,
"calib/step_q_c_n": 254.0,
"calib/step_q_gap": 0.05256165730201723,
"calib/step_q_w": 0.706217870257038,
"calib/step_q_w_n": 817.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3028.0,
"completions/max_terminated_length": 3028.0,
"completions/mean_length": 258.875,
"completions/mean_terminated_length": 260.91339111328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 26.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.012193223461508751,
"learning_rate": 5e-06,
"loss": 0.0445,
"num_tokens": 5056675.0,
"reward": 0.6640625,
"reward_std": 0.4222579300403595,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/format_reward_step": 0.8359375,
"step": 20
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4590178571428572,
"calib/avg_num_step_conf": 4.671875,
"calib/ece": 0.6457531380753138,
"calib/final_conf_rate": 0.93359375,
"calib/format_rate": 0.921875,
"calib/frac_conf_gt_0.9": 0.7782426778242678,
"calib/gap": 0.016340178571428554,
"calib/mean_conf": 0.9060041841004185,
"calib/mu_c": 0.91796875,
"calib/mu_w": 0.9016285714285714,
"calib/nonempty_final_conf_rate": 0.93359375,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.6419874476987447,
"calib/std_conf": 0.16417677221456925,
"calib/step_conf_rate": 0.97265625,
"calib/step_q_c": 0.7365342960288809,
"calib/step_q_c_n": 277.0,
"calib/step_q_gap": 0.0021822829712094816,
"calib/step_q_w": 0.7343520130576714,
"calib/step_q_w_n": 919.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2860.0,
"completions/max_terminated_length": 2860.0,
"completions/mean_length": 257.77734375,
"completions/mean_terminated_length": 258.7882385253906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 9.0,
"epoch": 0.0224,
"grad_norm": 0.009520730935037136,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.1137,
"num_tokens": 5225626.0,
"reward": 0.71484375,
"reward_std": 0.38410425186157227,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/format_reward_step": 0.921875,
"step": 21
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.549842877094972,
"calib/avg_num_step_conf": 4.54296875,
"calib/ece": 0.6523456790123459,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": 0.041869762569832436,
"calib/mean_conf": 0.9157201646090536,
"calib/mu_c": 0.9465625,
"calib/mu_w": 0.9046927374301675,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.6523456790123459,
"calib/std_conf": 0.1492443100388931,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.7564852583870968,
"calib/step_q_c_n": 310.0,
"calib/step_q_gap": 0.00028174138826908557,
"calib/step_q_w": 0.7562035169988277,
"calib/step_q_w_n": 853.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 594.0,
"completions/max_terminated_length": 594.0,
"completions/mean_length": 235.8671875,
"completions/mean_terminated_length": 236.79217529296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 57.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.007855575531721115,
"learning_rate": 4.944444444444445e-06,
"loss": -0.0545,
"num_tokens": 5387824.0,
"reward": 0.72265625,
"reward_std": 0.3997213840484619,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/format_reward_step": 0.9375,
"step": 22
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.480857706210976,
"calib/avg_num_step_conf": 4.28125,
"calib/ece": 0.6531983805668015,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9296875,
"calib/frac_conf_gt_0.9": 0.7732793522267206,
"calib/gap": -0.009895662175484654,
"calib/mean_conf": 0.9220242914979757,
"calib/mu_c": 0.9148529411764705,
"calib/mu_w": 0.9247486033519552,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.6499595141700404,
"calib/std_conf": 0.12176814522402662,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7559437086092715,
"calib/step_q_c_n": 302.0,
"calib/step_q_gap": -0.030645992946003786,
"calib/step_q_w": 0.7865897015552753,
"calib/step_q_w_n": 793.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1973.0,
"completions/max_terminated_length": 1973.0,
"completions/mean_length": 247.96875,
"completions/mean_terminated_length": 247.96875,
"completions/min_length": 56.0,
"completions/min_terminated_length": 56.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.008924001827836037,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.0309,
"num_tokens": 5555240.0,
"reward": 0.734375,
"reward_std": 0.3484991192817688,
"rewards/accuracy_reward_step": 0.26953125,
"rewards/format_reward_step": 0.9296875,
"step": 23
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.451530612244898,
"calib/avg_num_step_conf": 4.7890625,
"calib/ece": 0.709469387755102,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.8081632653061225,
"calib/gap": -0.015066137566137394,
"calib/mean_conf": 0.9255510204081633,
"calib/mu_c": 0.9139285714285715,
"calib/mu_w": 0.9289947089947089,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.7032244897959183,
"calib/std_conf": 0.1260782169316851,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7920491803278689,
"calib/step_q_c_n": 244.0,
"calib/step_q_gap": 0.009168324930720173,
"calib/step_q_w": 0.7828808553971487,
"calib/step_q_w_n": 982.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2769.0,
"completions/max_terminated_length": 2769.0,
"completions/mean_length": 251.62109375,
"completions/mean_terminated_length": 251.62109375,
"completions/min_length": 67.0,
"completions/min_terminated_length": 67.0,
"epoch": 0.0256,
"grad_norm": 0.006747386883944273,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0257,
"num_tokens": 5724167.0,
"reward": 0.697265625,
"reward_std": 0.2716612219810486,
"rewards/accuracy_reward_step": 0.2265625,
"rewards/format_reward_step": 0.94140625,
"step": 24
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.48230574324324316,
"calib/avg_num_step_conf": 4.828125,
"calib/ece": 0.6783534136546185,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.8032128514056225,
"calib/gap": 0.007891047297297282,
"calib/mean_conf": 0.9327309236947792,
"calib/mu_c": 0.93859375,
"calib/mu_w": 0.9307027027027027,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.6770281124497992,
"calib/std_conf": 0.1078587312700969,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8123549488054608,
"calib/step_q_c_n": 293.0,
"calib/step_q_gap": 0.018632785496871196,
"calib/step_q_w": 0.7937221633085896,
"calib/step_q_w_n": 943.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 562.0,
"completions/max_terminated_length": 562.0,
"completions/mean_length": 246.1328125,
"completions/mean_terminated_length": 247.09805297851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.0071069239638745785,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0321,
"num_tokens": 5890401.0,
"reward": 0.736328125,
"reward_std": 0.33219289779663086,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/format_reward_step": 0.96484375,
"step": 25
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5279473317056156,
"calib/avg_num_step_conf": 5.06640625,
"calib/ece": 0.694979919678715,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.8313253012048193,
"calib/gap": 0.0028749564004187933,
"calib/mean_conf": 0.9319277108433734,
"calib/mu_c": 0.9340983606557379,
"calib/mu_w": 0.9312234042553191,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6909638554216868,
"calib/std_conf": 0.11348220848109943,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7745049668874173,
"calib/step_q_c_n": 302.0,
"calib/step_q_gap": -0.023032922559819036,
"calib/step_q_w": 0.7975378894472364,
"calib/step_q_w_n": 995.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2198.0,
"completions/max_terminated_length": 2198.0,
"completions/mean_length": 269.98046875,
"completions/mean_terminated_length": 269.98046875,
"completions/min_length": 93.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.006924426648765802,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0364,
"num_tokens": 6064756.0,
"reward": 0.72265625,
"reward_std": 0.3338688611984253,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/format_reward_step": 0.9609375,
"step": 26
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.48424116424116426,
"calib/avg_num_step_conf": 5.109375,
"calib/ece": 0.683996,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.848,
"calib/gap": -0.011803326403326597,
"calib/mean_conf": 0.9371959999999999,
"calib/mu_c": 0.9284615384615383,
"calib/mu_w": 0.9402648648648649,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6805960000000001,
"calib/std_conf": 0.09535859470441037,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8074,
"calib/step_q_c_n": 330.0,
"calib/step_q_gap": -0.007578169734151263,
"calib/step_q_w": 0.8149781697341513,
"calib/step_q_w_n": 978.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 662.0,
"completions/max_terminated_length": 662.0,
"completions/mean_length": 252.43359375,
"completions/mean_terminated_length": 253.4235382080078,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.0288,
"grad_norm": 0.007143011782318354,
"learning_rate": 4.805555555555556e-06,
"loss": 0.002,
"num_tokens": 6234595.0,
"reward": 0.736328125,
"reward_std": 0.360126793384552,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/format_reward_step": 0.96484375,
"step": 27
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.47192028985507245,
"calib/avg_num_step_conf": 5.17578125,
"calib/ece": 0.6729446640316206,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.8774703557312253,
"calib/gap": 0.0005344202898549533,
"calib/mean_conf": 0.9381620553359683,
"calib/mu_c": 0.9385507246376811,
"calib/mu_w": 0.9380163043478261,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6691897233201581,
"calib/std_conf": 0.10872182940265937,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8142175226586104,
"calib/step_q_c_n": 331.0,
"calib/step_q_gap": -0.015080264061711701,
"calib/step_q_w": 0.8292977867203221,
"calib/step_q_w_n": 994.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 888.0,
"completions/max_terminated_length": 888.0,
"completions/mean_length": 278.08203125,
"completions/mean_terminated_length": 279.1725769042969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.006403841078281403,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0181,
"num_tokens": 6412728.0,
"reward": 0.76171875,
"reward_std": 0.33537590503692627,
"rewards/accuracy_reward_step": 0.26953125,
"rewards/format_reward_step": 0.984375,
"step": 28
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.47236394557823125,
"calib/avg_num_step_conf": 5.5546875,
"calib/ece": 0.7100937500000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.8828125,
"calib/gap": 0.0041292517006803475,
"calib/mean_conf": 0.9401718750000001,
"calib/mu_c": 0.9433333333333335,
"calib/mu_w": 0.9392040816326531,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.7079453125000001,
"calib/std_conf": 0.11483341258747112,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8137987012987012,
"calib/step_q_c_n": 308.0,
"calib/step_q_gap": -0.00451997015551775,
"calib/step_q_w": 0.818318671454219,
"calib/step_q_w_n": 1114.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 893.0,
"completions/max_terminated_length": 893.0,
"completions/mean_length": 298.6484375,
"completions/mean_terminated_length": 299.8196105957031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.005184306297451258,
"learning_rate": 4.75e-06,
"loss": -0.0327,
"num_tokens": 6596310.0,
"reward": 0.73046875,
"reward_std": 0.2800149917602539,
"rewards/accuracy_reward_step": 0.234375,
"rewards/format_reward_step": 0.9921875,
"step": 29
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.46903431763766956,
"calib/avg_num_step_conf": 5.5546875,
"calib/ece": 0.6655822891566265,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.8554216867469879,
"calib/gap": 0.004382139664804496,
"calib/mean_conf": 0.9467067871485944,
"calib/mu_c": 0.9498570000000001,
"calib/mu_w": 0.9454748603351956,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.6655822891566265,
"calib/std_conf": 0.07900735903474725,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.832691556122449,
"calib/step_q_c_n": 392.0,
"calib/step_q_gap": 0.01874835223895388,
"calib/step_q_w": 0.8139432038834952,
"calib/step_q_w_n": 1030.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2168.0,
"completions/max_terminated_length": 2168.0,
"completions/mean_length": 309.96484375,
"completions/mean_terminated_length": 309.96484375,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.032,
"grad_norm": 0.006838866975158453,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0511,
"num_tokens": 6782645.0,
"reward": 0.76171875,
"reward_std": 0.3281659185886383,
"rewards/accuracy_reward_step": 0.27734375,
"rewards/format_reward_step": 0.96875,
"step": 30
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49033649698015536,
"calib/avg_num_step_conf": 6.02734375,
"calib/ece": 0.6988406374501991,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.8725099601593626,
"calib/gap": 0.013340897325280432,
"calib/mean_conf": 0.9418685258964143,
"calib/mu_c": 0.9519672131147541,
"calib/mu_w": 0.9386263157894736,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6988406374501991,
"calib/std_conf": 0.09511378510473928,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8257632398753896,
"calib/step_q_c_n": 321.0,
"calib/step_q_gap": 0.025200228418761128,
"calib/step_q_w": 0.8005630114566284,
"calib/step_q_w_n": 1222.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2094.0,
"completions/max_terminated_length": 2094.0,
"completions/mean_length": 323.72265625,
"completions/mean_terminated_length": 323.72265625,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.0056033809669315815,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0189,
"num_tokens": 6971430.0,
"reward": 0.728515625,
"reward_std": 0.3097946345806122,
"rewards/accuracy_reward_step": 0.23828125,
"rewards/format_reward_step": 0.98046875,
"step": 31
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.49714640198511173,
"calib/avg_num_step_conf": 5.9921875,
"calib/ece": 0.6846215139442231,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.8565737051792829,
"calib/gap": 0.006164598842018254,
"calib/mean_conf": 0.9435856573705178,
"calib/mu_c": 0.9481538461538461,
"calib/mu_w": 0.9419892473118279,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6846215139442231,
"calib/std_conf": 0.08074782220162297,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8077514792899407,
"calib/step_q_c_n": 338.0,
"calib/step_q_gap": 0.008119372266529257,
"calib/step_q_w": 0.7996321070234115,
"calib/step_q_w_n": 1196.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1812.0,
"completions/max_terminated_length": 1812.0,
"completions/mean_length": 316.3203125,
"completions/mean_terminated_length": 316.3203125,
"completions/min_length": 108.0,
"completions/min_terminated_length": 108.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.005981651600450277,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0534,
"num_tokens": 7159112.0,
"reward": 0.740234375,
"reward_std": 0.29884395003318787,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/format_reward_step": 0.97265625,
"step": 32
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49311424100156487,
"calib/avg_num_step_conf": 6.609375,
"calib/ece": 0.6779482071713147,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9243027888446215,
"calib/gap": -0.00025508607198776456,
"calib/mean_conf": 0.9608167330677292,
"calib/mu_c": 0.9606338028169011,
"calib/mu_w": 0.9608888888888889,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6779482071713147,
"calib/std_conf": 0.0415897996286572,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8484463414634147,
"calib/step_q_c_n": 492.0,
"calib/step_q_gap": 0.015152258130081364,
"calib/step_q_w": 0.8332940833333333,
"calib/step_q_w_n": 1200.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1465.0,
"completions/max_terminated_length": 1465.0,
"completions/mean_length": 356.19140625,
"completions/mean_terminated_length": 356.19140625,
"completions/min_length": 136.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.0352,
"grad_norm": 0.0054481374099850655,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0144,
"num_tokens": 7357169.0,
"reward": 0.771484375,
"reward_std": 0.2949320673942566,
"rewards/accuracy_reward_step": 0.28125,
"rewards/format_reward_step": 0.98046875,
"step": 33
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.46460021134202184,
"calib/avg_num_step_conf": 6.3359375,
"calib/ece": 0.6082500000000001,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.8333333333333334,
"calib/gap": -0.0046491722437478344,
"calib/mean_conf": 0.9455515873015873,
"calib/mu_c": 0.942470588235294,
"calib/mu_w": 0.9471197604790418,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6082500000000001,
"calib/std_conf": 0.042571803460288424,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8041634980988593,
"calib/step_q_c_n": 526.0,
"calib/step_q_gap": 0.007002001748494302,
"calib/step_q_w": 0.797161496350365,
"calib/step_q_w_n": 1096.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2219.0,
"completions/max_terminated_length": 2219.0,
"completions/mean_length": 333.64453125,
"completions/mean_terminated_length": 333.64453125,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.005749785806983709,
"learning_rate": 4.611111111111112e-06,
"loss": -0.0071,
"num_tokens": 7547694.0,
"reward": 0.822265625,
"reward_std": 0.2967801094055176,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/format_reward_step": 0.98046875,
"step": 34
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5824603521024297,
"calib/avg_num_step_conf": 7.16796875,
"calib/ece": 0.6306877470355731,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.8458498023715415,
"calib/gap": 0.01081652844463854,
"calib/mean_conf": 0.9429407114624507,
"calib/mu_c": 0.950379746835443,
"calib/mu_w": 0.9395632183908045,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6306877470355731,
"calib/std_conf": 0.07253234553563628,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8060387323943662,
"calib/step_q_c_n": 568.0,
"calib/step_q_gap": -0.004912806674300163,
"calib/step_q_w": 0.8109515390686663,
"calib/step_q_w_n": 1267.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 927.0,
"completions/max_terminated_length": 927.0,
"completions/mean_length": 384.0,
"completions/mean_terminated_length": 385.50592041015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.005311410874128342,
"learning_rate": 4.583333333333333e-06,
"loss": -0.0117,
"num_tokens": 7755254.0,
"reward": 0.802734375,
"reward_std": 0.302001416683197,
"rewards/accuracy_reward_step": 0.30859375,
"rewards/format_reward_step": 0.98828125,
"step": 35
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4753882685149532,
"calib/avg_num_step_conf": 6.94140625,
"calib/ece": 0.3556723320158103,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.7312252964426877,
"calib/gap": -0.004662835322808423,
"calib/mean_conf": 0.9214501976284585,
"calib/mu_c": 0.9194965986394558,
"calib/mu_w": 0.9241594339622642,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.34804743083003953,
"calib/std_conf": 0.10258914435497857,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7864002205071663,
"calib/step_q_c_n": 907.0,
"calib/step_q_gap": 0.007645622806016883,
"calib/step_q_w": 0.7787545977011494,
"calib/step_q_w_n": 870.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2170.0,
"completions/max_terminated_length": 2170.0,
"completions/mean_length": 368.97265625,
"completions/mean_terminated_length": 370.41961669921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.0384,
"grad_norm": 0.006220624782145023,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0187,
"num_tokens": 7952423.0,
"reward": 1.06640625,
"reward_std": 0.3570103645324707,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.984375,
"step": 36
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49999999999999994,
"calib/avg_num_step_conf": 6.84765625,
"calib/ece": 0.4950980392156864,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6823529411764706,
"calib/gap": 0.010418034857287206,
"calib/mean_conf": 0.9118039215686274,
"calib/mu_c": 0.9178504672897198,
"calib/mu_w": 0.9074324324324325,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.49364705882352955,
"calib/std_conf": 0.11364151057513168,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7692052023121386,
"calib/step_q_c_n": 692.0,
"calib/step_q_gap": 0.0002136848757579024,
"calib/step_q_w": 0.7689915174363807,
"calib/step_q_w_n": 1061.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2760.0,
"completions/max_terminated_length": 2760.0,
"completions/mean_length": 382.6015625,
"completions/mean_terminated_length": 382.6015625,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.006087584886699915,
"learning_rate": 4.527777777777778e-06,
"loss": -0.0051,
"num_tokens": 8157465.0,
"reward": 0.912109375,
"reward_std": 0.297029584646225,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/format_reward_step": 0.98828125,
"step": 37
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4294117647058824,
"calib/avg_num_step_conf": 7.4453125,
"calib/ece": 0.5149603174603173,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6507936507936508,
"calib/gap": -0.015439215686274377,
"calib/mean_conf": 0.9161507936507937,
"calib/mu_c": 0.9069607843137255,
"calib/mu_w": 0.9223999999999999,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5131746031746031,
"calib/std_conf": 0.06709287184009244,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7816969696969697,
"calib/step_q_c_n": 660.0,
"calib/step_q_gap": 0.01830414465684127,
"calib/step_q_w": 0.7633928250401284,
"calib/step_q_w_n": 1246.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2449.0,
"completions/max_terminated_length": 2449.0,
"completions/mean_length": 397.84765625,
"completions/mean_terminated_length": 397.84765625,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.006814414635300636,
"learning_rate": 4.5e-06,
"loss": 0.0884,
"num_tokens": 8366202.0,
"reward": 0.890625,
"reward_std": 0.3247237503528595,
"rewards/accuracy_reward_step": 0.3984375,
"rewards/format_reward_step": 0.984375,
"step": 38
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.45514705882352946,
"calib/avg_num_step_conf": 7.30078125,
"calib/ece": 0.5707874015748031,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6062992125984252,
"calib/gap": -0.009245098039215582,
"calib/mean_conf": 0.8953543307086614,
"calib/mu_c": 0.8891666666666667,
"calib/mu_w": 0.8984117647058822,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5677165354330709,
"calib/std_conf": 0.11664803561795813,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7312433392539964,
"calib/step_q_c_n": 563.0,
"calib/step_q_gap": -0.03334318448260387,
"calib/step_q_w": 0.7645865237366003,
"calib/step_q_w_n": 1306.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1102.0,
"completions/max_terminated_length": 1102.0,
"completions/mean_length": 388.00390625,
"completions/mean_terminated_length": 389.5255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.0416,
"grad_norm": 0.005457035731524229,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0047,
"num_tokens": 8571619.0,
"reward": 0.828125,
"reward_std": 0.2757005989551544,
"rewards/accuracy_reward_step": 0.33203125,
"rewards/format_reward_step": 0.9921875,
"step": 39
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.45176423946081673,
"calib/avg_num_step_conf": 8.60546875,
"calib/ece": 0.5330588235294118,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.6431372549019608,
"calib/gap": -0.004187260473106802,
"calib/mean_conf": 0.901686274509804,
"calib/mu_c": 0.8990425531914894,
"calib/mu_w": 0.9032298136645962,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5330588235294118,
"calib/std_conf": 0.0936152933283032,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7234317862165963,
"calib/step_q_c_n": 711.0,
"calib/step_q_gap": -0.06037699394426166,
"calib/step_q_w": 0.783808780160858,
"calib/step_q_w_n": 1492.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1915.0,
"completions/max_terminated_length": 1915.0,
"completions/mean_length": 455.390625,
"completions/mean_terminated_length": 455.390625,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.005192178767174482,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0217,
"num_tokens": 8794959.0,
"reward": 0.865234375,
"reward_std": 0.2953948974609375,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/format_reward_step": 0.99609375,
"step": 40
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49226804123711343,
"calib/avg_num_step_conf": 7.61328125,
"calib/ece": 0.2926693227091633,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5856573705179283,
"calib/gap": 0.00016668898112182085,
"calib/mean_conf": 0.8964940239043824,
"calib/mu_c": 0.8965584415584416,
"calib/mu_w": 0.8963917525773197,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.287808764940239,
"calib/std_conf": 0.09422603029419956,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7469074421513446,
"calib/step_q_c_n": 1066.0,
"calib/step_q_gap": -0.01468372432657139,
"calib/step_q_w": 0.761591166477916,
"calib/step_q_w_n": 883.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2427.0,
"completions/max_terminated_length": 2427.0,
"completions/mean_length": 399.8828125,
"completions/mean_terminated_length": 401.4510192871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.005844591651111841,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0438,
"num_tokens": 9004577.0,
"reward": 1.091796875,
"reward_std": 0.3158378601074219,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/format_reward_step": 0.98046875,
"step": 41
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5313282442748092,
"calib/avg_num_step_conf": 7.1171875,
"calib/ece": 0.40785156250000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.5859375,
"calib/gap": 0.01725007633587794,
"calib/mean_conf": 0.8961328125000001,
"calib/mu_c": 0.90496,
"calib/mu_w": 0.887709923664122,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.40785156250000004,
"calib/std_conf": 0.09651488478903056,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7085758293838863,
"calib/step_q_c_n": 844.0,
"calib/step_q_gap": -0.00878613380629778,
"calib/step_q_w": 0.7173619631901841,
"calib/step_q_w_n": 978.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1047.0,
"completions/max_terminated_length": 1047.0,
"completions/mean_length": 346.79296875,
"completions/mean_terminated_length": 348.1529541015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.0448,
"grad_norm": 0.005971704609692097,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0239,
"num_tokens": 9197724.0,
"reward": 0.98828125,
"reward_std": 0.28300461173057556,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 1.0,
"step": 42
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5105181145305805,
"calib/avg_num_step_conf": 7.57421875,
"calib/ece": 0.5034387351778657,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.616600790513834,
"calib/gap": 0.006023893000908664,
"calib/mean_conf": 0.906600790513834,
"calib/mu_c": 0.9101960784313724,
"calib/mu_w": 0.9041721854304637,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5034387351778657,
"calib/std_conf": 0.0662095332400494,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7505651558073655,
"calib/step_q_c_n": 706.0,
"calib/step_q_gap": 0.012503517526749164,
"calib/step_q_w": 0.7380616382806163,
"calib/step_q_w_n": 1233.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1910.0,
"completions/max_terminated_length": 1910.0,
"completions/mean_length": 402.4765625,
"completions/mean_terminated_length": 404.054931640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.005978772882372141,
"learning_rate": 4.361111111111112e-06,
"loss": 0.0315,
"num_tokens": 9405982.0,
"reward": 0.888671875,
"reward_std": 0.3324914872646332,
"rewards/accuracy_reward_step": 0.3984375,
"rewards/format_reward_step": 0.98046875,
"step": 43
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4941358024691358,
"calib/avg_num_step_conf": 8.42578125,
"calib/ece": 0.5652380952380953,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6587301587301587,
"calib/gap": -0.007876543209876519,
"calib/mean_conf": 0.9070634920634921,
"calib/mu_c": 0.902,
"calib/mu_w": 0.9098765432098765,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5575793650793651,
"calib/std_conf": 0.09409609627310389,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7202049780380673,
"calib/step_q_c_n": 683.0,
"calib/step_q_gap": 0.003715832854892187,
"calib/step_q_w": 0.7164891451831751,
"calib/step_q_w_n": 1474.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2495.0,
"completions/max_terminated_length": 2495.0,
"completions/mean_length": 462.9609375,
"completions/mean_terminated_length": 462.9609375,
"completions/min_length": 147.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.005756228230893612,
"learning_rate": 4.333333333333334e-06,
"loss": 0.082,
"num_tokens": 9630820.0,
"reward": 0.84375,
"reward_std": 0.30472099781036377,
"rewards/accuracy_reward_step": 0.3515625,
"rewards/format_reward_step": 0.984375,
"step": 44
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5135653871177619,
"calib/avg_num_step_conf": 8.78515625,
"calib/ece": 0.5013147410358565,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.6972111553784861,
"calib/gap": 0.0026278464541313884,
"calib/mean_conf": 0.9187649402390439,
"calib/mu_c": 0.9202830188679245,
"calib/mu_w": 0.9176551724137931,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4988844621513943,
"calib/std_conf": 0.07374763502114198,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7569809322033897,
"calib/step_q_c_n": 944.0,
"calib/step_q_gap": 0.004582464770439554,
"calib/step_q_w": 0.7523984674329501,
"calib/step_q_w_n": 1305.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2497.0,
"completions/max_terminated_length": 2497.0,
"completions/mean_length": 480.34765625,
"completions/mean_terminated_length": 480.34765625,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.048,
"grad_norm": 0.005964437033981085,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0476,
"num_tokens": 9858837.0,
"reward": 0.904296875,
"reward_std": 0.36644038558006287,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/format_reward_step": 0.98046875,
"step": 45
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.476935914552737,
"calib/avg_num_step_conf": 9.515625,
"calib/ece": 0.5004048582995951,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.7813765182186235,
"calib/gap": 5.6074766355163064e-05,
"calib/mean_conf": 0.9220242914979755,
"calib/mu_c": 0.9220560747663551,
"calib/mu_w": 0.9219999999999999,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4946153846153846,
"calib/std_conf": 0.09860952016211864,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7414906832298138,
"calib/step_q_c_n": 805.0,
"calib/step_q_gap": 0.011960027190573985,
"calib/step_q_w": 0.7295306560392398,
"calib/step_q_w_n": 1631.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2472.0,
"completions/max_terminated_length": 2472.0,
"completions/mean_length": 497.73046875,
"completions/mean_terminated_length": 497.73046875,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.005311571527272463,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0152,
"num_tokens": 10091024.0,
"reward": 0.8984375,
"reward_std": 0.3058202266693115,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/format_reward_step": 0.9609375,
"step": 46
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4700066137566138,
"calib/avg_num_step_conf": 9.84765625,
"calib/ece": 0.4472357723577235,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.8048780487804879,
"calib/gap": 0.005452380952380764,
"calib/mean_conf": 0.935040650406504,
"calib/mu_c": 0.9378333333333333,
"calib/mu_w": 0.9323809523809525,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4472357723577235,
"calib/std_conf": 0.08181750447551771,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.731946446961895,
"calib/step_q_c_n": 971.0,
"calib/step_q_gap": 0.007961930832862785,
"calib/step_q_w": 0.7239845161290323,
"calib/step_q_w_n": 1550.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2475.0,
"completions/max_terminated_length": 2475.0,
"completions/mean_length": 499.41015625,
"completions/mean_terminated_length": 501.36865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.005696811247617006,
"learning_rate": 4.25e-06,
"loss": 0.0391,
"num_tokens": 10324849.0,
"reward": 0.94921875,
"reward_std": 0.3375682234764099,
"rewards/accuracy_reward_step": 0.46875,
"rewards/format_reward_step": 0.9609375,
"step": 47
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.46742485238862047,
"calib/avg_num_step_conf": 7.88671875,
"calib/ece": 0.5768110236220473,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.7716535433070866,
"calib/gap": -0.012356414385400027,
"calib/mean_conf": 0.9311417322834645,
"calib/mu_c": 0.9232608695652172,
"calib/mu_w": 0.9356172839506173,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5728740157480315,
"calib/std_conf": 0.08088995386554051,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7187003058103976,
"calib/step_q_c_n": 654.0,
"calib/step_q_gap": 0.02775455800981097,
"calib/step_q_w": 0.6909457478005866,
"calib/step_q_w_n": 1364.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1657.0,
"completions/max_terminated_length": 1657.0,
"completions/mean_length": 421.19140625,
"completions/mean_terminated_length": 421.19140625,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.0512,
"grad_norm": 0.006146811414510012,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0513,
"num_tokens": 10536362.0,
"reward": 0.853515625,
"reward_std": 0.3062730133533478,
"rewards/accuracy_reward_step": 0.359375,
"rewards/format_reward_step": 0.98828125,
"step": 48
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5641134566368211,
"calib/avg_num_step_conf": 8.5390625,
"calib/ece": 0.5155599999999999,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.84,
"calib/gap": 0.012402457355728536,
"calib/mean_conf": 0.94356,
"calib/mu_c": 0.9506542056074767,
"calib/mu_w": 0.9382517482517482,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5155599999999999,
"calib/std_conf": 0.050791007078025134,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7094543297746145,
"calib/step_q_c_n": 843.0,
"calib/step_q_gap": -0.002788335899249872,
"calib/step_q_w": 0.7122426656738644,
"calib/step_q_w_n": 1343.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3051.0,
"completions/max_terminated_length": 3051.0,
"completions/mean_length": 465.72265625,
"completions/mean_terminated_length": 465.72265625,
"completions/min_length": 153.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.0052592456340789795,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0491,
"num_tokens": 10760123.0,
"reward": 0.90625,
"reward_std": 0.24188709259033203,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/format_reward_step": 0.9765625,
"step": 49
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5087383056133057,
"calib/avg_num_step_conf": 9.5859375,
"calib/ece": 0.360436507936508,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.8849206349206349,
"calib/gap": 0.009737525987526086,
"calib/mean_conf": 0.9477380952380952,
"calib/mu_c": 0.9517567567567569,
"calib/mu_w": 0.9420192307692308,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.360436507936508,
"calib/std_conf": 0.05085814048424047,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7369622448979591,
"calib/step_q_c_n": 1274.0,
"calib/step_q_gap": 0.015808724965813203,
"calib/step_q_w": 0.7211535199321459,
"calib/step_q_w_n": 1179.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2600.0,
"completions/max_terminated_length": 2600.0,
"completions/mean_length": 508.54296875,
"completions/mean_terminated_length": 510.53729248046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.00552935479208827,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0365,
"num_tokens": 10995670.0,
"reward": 1.068359375,
"reward_std": 0.3270677626132965,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.98046875,
"step": 50
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.38816475495307606,
"calib/avg_num_step_conf": 9.22265625,
"calib/ece": 0.5006425702811244,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.8795180722891566,
"calib/gap": -0.008662669447340954,
"calib/mean_conf": 0.9436947791164659,
"calib/mu_c": 0.9389285714285714,
"calib/mu_w": 0.9475912408759124,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4972690763052207,
"calib/std_conf": 0.08355554637588156,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7176267529665588,
"calib/step_q_c_n": 927.0,
"calib/step_q_gap": 0.03721873344075688,
"calib/step_q_w": 0.6804080195258019,
"calib/step_q_w_n": 1434.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2303.0,
"completions/max_terminated_length": 2303.0,
"completions/mean_length": 480.24609375,
"completions/mean_terminated_length": 480.24609375,
"completions/min_length": 153.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.0544,
"grad_norm": 0.005861993413418531,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0563,
"num_tokens": 11227909.0,
"reward": 0.923828125,
"reward_std": 0.3243404030799866,
"rewards/accuracy_reward_step": 0.4375,
"rewards/format_reward_step": 0.97265625,
"step": 51
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.493083885772565,
"calib/avg_num_step_conf": 8.140625,
"calib/ece": 0.35625984251968495,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.84251968503937,
"calib/gap": 0.009509178990311118,
"calib/mean_conf": 0.9389370078740158,
"calib/mu_c": 0.9429054054054055,
"calib/mu_w": 0.9333962264150943,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35625984251968495,
"calib/std_conf": 0.056079702565777566,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7226880811496197,
"calib/step_q_c_n": 1183.0,
"calib/step_q_gap": 0.009813497353837297,
"calib/step_q_w": 0.7128745837957824,
"calib/step_q_w_n": 901.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2064.0,
"completions/max_terminated_length": 2064.0,
"completions/mean_length": 425.03125,
"completions/mean_terminated_length": 425.03125,
"completions/min_length": 160.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.006341880187392235,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0423,
"num_tokens": 11444669.0,
"reward": 1.07421875,
"reward_std": 0.30103766918182373,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.9921875,
"step": 52
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5410216718266254,
"calib/avg_num_step_conf": 9.28125,
"calib/ece": 0.4079600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.888,
"calib/gap": 0.009730392156862666,
"calib/mean_conf": 0.95196,
"calib/mu_c": 0.9563970588235293,
"calib/mu_w": 0.9466666666666667,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4079600000000001,
"calib/std_conf": 0.04056301763922403,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7413472333600641,
"calib/step_q_c_n": 1247.0,
"calib/step_q_gap": 0.07657221121657432,
"calib/step_q_w": 0.6647750221434898,
"calib/step_q_w_n": 1129.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1956.0,
"completions/max_terminated_length": 1956.0,
"completions/mean_length": 474.7265625,
"completions/mean_terminated_length": 476.5882568359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.005445745773613453,
"learning_rate": 4.083333333333334e-06,
"loss": 0.0153,
"num_tokens": 11672023.0,
"reward": 1.01953125,
"reward_std": 0.26240503787994385,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.9765625,
"step": 53
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4626398946675445,
"calib/avg_num_step_conf": 8.015625,
"calib/ece": 0.33367193675889323,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9090909090909091,
"calib/gap": -0.004582290980908232,
"calib/mean_conf": 0.9461620553359683,
"calib/mu_c": 0.9443870967741937,
"calib/mu_w": 0.948969387755102,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33359288537549403,
"calib/std_conf": 0.03997402099700347,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7446314907872696,
"calib/step_q_c_n": 1194.0,
"calib/step_q_gap": 0.022119835775614716,
"calib/step_q_w": 0.7225116550116549,
"calib/step_q_w_n": 858.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2277.0,
"completions/max_terminated_length": 2277.0,
"completions/mean_length": 412.94921875,
"completions/mean_terminated_length": 412.94921875,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.0576,
"grad_norm": 0.006437535397708416,
"learning_rate": 4.055555555555556e-06,
"loss": -0.0031,
"num_tokens": 11883970.0,
"reward": 1.103515625,
"reward_std": 0.28321897983551025,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.98828125,
"step": 54
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4576520601700458,
"calib/avg_num_step_conf": 8.60546875,
"calib/ece": 0.49542168674698794,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.8273092369477911,
"calib/gap": 0.003570307390451277,
"calib/mean_conf": 0.9371887550200803,
"calib/mu_c": 0.9391818181818182,
"calib/mu_w": 0.935611510791367,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.49542168674698794,
"calib/std_conf": 0.060001075241839896,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7288025477707006,
"calib/step_q_c_n": 785.0,
"calib/step_q_gap": -0.007994913442275431,
"calib/step_q_w": 0.7367974612129761,
"calib/step_q_w_n": 1418.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2530.0,
"completions/max_terminated_length": 2530.0,
"completions/mean_length": 471.953125,
"completions/mean_terminated_length": 471.953125,
"completions/min_length": 160.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.0072900657542049885,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0964,
"num_tokens": 12112614.0,
"reward": 0.9140625,
"reward_std": 0.32115888595581055,
"rewards/accuracy_reward_step": 0.4296875,
"rewards/format_reward_step": 0.96875,
"step": 55
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.47011436153408737,
"calib/avg_num_step_conf": 10.078125,
"calib/ece": 0.4711507936507937,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.8849206349206349,
"calib/gap": 0.0061875276426359704,
"calib/mean_conf": 0.9433730158730159,
"calib/mu_c": 0.9466386554621848,
"calib/mu_w": 0.9404511278195489,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4711507936507937,
"calib/std_conf": 0.06829072238540447,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7589653809971777,
"calib/step_q_c_n": 1063.0,
"calib/step_q_gap": -0.012593617025234871,
"calib/step_q_w": 0.7715589980224126,
"calib/step_q_w_n": 1517.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3019.0,
"completions/max_terminated_length": 3019.0,
"completions/mean_length": 527.3046875,
"completions/mean_terminated_length": 527.3046875,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.006298091262578964,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1152,
"num_tokens": 12354444.0,
"reward": 0.955078125,
"reward_std": 0.3263026773929596,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/format_reward_step": 0.98046875,
"step": 56
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.48933038999264167,
"calib/avg_num_step_conf": 9.140625,
"calib/ece": 0.34363999999999995,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.908,
"calib/gap": 0.007482105826477015,
"calib/mean_conf": 0.9426,
"calib/mu_c": 0.945562913907285,
"calib/mu_w": 0.938080808080808,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3411199999999999,
"calib/std_conf": 0.07157681188764975,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7741493055555555,
"calib/step_q_c_n": 1152.0,
"calib/step_q_gap": 0.02985469276094277,
"calib/step_q_w": 0.7442946127946127,
"calib/step_q_w_n": 1188.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2409.0,
"completions/max_terminated_length": 2409.0,
"completions/mean_length": 478.20703125,
"completions/mean_terminated_length": 480.0823669433594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.0608,
"grad_norm": 0.005231020972132683,
"learning_rate": 3.972222222222223e-06,
"loss": 0.0412,
"num_tokens": 12583657.0,
"reward": 1.076171875,
"reward_std": 0.2724471092224121,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.97265625,
"step": 57
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.569040650406504,
"calib/avg_num_step_conf": 10.88671875,
"calib/ece": 0.43987903225806435,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.8709677419354839,
"calib/gap": 0.026591869918699107,
"calib/mean_conf": 0.9335887096774192,
"calib/mu_c": 0.9469918699186991,
"calib/mu_w": 0.9204,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.43874999999999986,
"calib/std_conf": 0.10877073492329829,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7726014109347442,
"calib/step_q_c_n": 1134.0,
"calib/step_q_gap": 0.019956825332808403,
"calib/step_q_w": 0.7526445856019358,
"calib/step_q_w_n": 1653.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2918.0,
"completions/max_terminated_length": 2918.0,
"completions/mean_length": 575.01171875,
"completions/mean_terminated_length": 575.01171875,
"completions/min_length": 161.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.00627827737480402,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0552,
"num_tokens": 12837180.0,
"reward": 0.96484375,
"reward_std": 0.38476935029029846,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/format_reward_step": 0.96875,
"step": 58
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4412151170066325,
"calib/avg_num_step_conf": 9.04296875,
"calib/ece": 0.4275098814229249,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.8893280632411067,
"calib/gap": -0.0067269428106621065,
"calib/mean_conf": 0.9433201581027667,
"calib/mu_c": 0.9400763358778625,
"calib/mu_w": 0.9468032786885247,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4265217391304348,
"calib/std_conf": 0.0421746353906081,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7586098930481283,
"calib/step_q_c_n": 1122.0,
"calib/step_q_gap": 0.011274268572017743,
"calib/step_q_w": 0.7473356244761106,
"calib/step_q_w_n": 1193.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3027.0,
"completions/max_terminated_length": 3027.0,
"completions/mean_length": 473.15234375,
"completions/mean_terminated_length": 475.00787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.006247291341423988,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0177,
"num_tokens": 13064555.0,
"reward": 1.009765625,
"reward_std": 0.3327410817146301,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.98828125,
"step": 59
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.411923076923077,
"calib/avg_num_step_conf": 8.53515625,
"calib/ece": 0.45959999999999995,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.804,
"calib/gap": -0.013160256410256421,
"calib/mean_conf": 0.93376,
"calib/mu_c": 0.9269166666666666,
"calib/mu_w": 0.940076923076923,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.45668,
"calib/std_conf": 0.051360124610440726,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.752872340425532,
"calib/step_q_c_n": 846.0,
"calib/step_q_gap": 0.03690074102343932,
"calib/step_q_w": 0.7159715994020927,
"calib/step_q_w_n": 1338.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2456.0,
"completions/max_terminated_length": 2456.0,
"completions/mean_length": 473.87890625,
"completions/mean_terminated_length": 473.87890625,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.064,
"grad_norm": 0.006815559696406126,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0897,
"num_tokens": 13294724.0,
"reward": 0.955078125,
"reward_std": 0.3364248275756836,
"rewards/accuracy_reward_step": 0.46875,
"rewards/format_reward_step": 0.97265625,
"step": 60
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49205002513826046,
"calib/avg_num_step_conf": 7.77734375,
"calib/ece": 0.39075098814229225,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.782608695652174,
"calib/gap": 0.010588235294117676,
"calib/mean_conf": 0.9256916996047431,
"calib/mu_c": 0.9305882352941177,
"calib/mu_w": 0.92,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3894466403162053,
"calib/std_conf": 0.09515961113404409,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7580360465116279,
"calib/step_q_c_n": 946.0,
"calib/step_q_gap": 0.04087815177478571,
"calib/step_q_w": 0.7171578947368422,
"calib/step_q_w_n": 1045.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2257.0,
"completions/max_terminated_length": 2257.0,
"completions/mean_length": 405.0078125,
"completions/mean_terminated_length": 405.0078125,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.006788547150790691,
"learning_rate": 3.861111111111112e-06,
"loss": 0.017,
"num_tokens": 13502470.0,
"reward": 1.0234375,
"reward_std": 0.2872949242591858,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.984375,
"step": 61
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.47897445553254253,
"calib/avg_num_step_conf": 8.375,
"calib/ece": 0.4763779527559056,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6968503937007874,
"calib/gap": -0.0037042615954308022,
"calib/mean_conf": 0.9212598425196851,
"calib/mu_c": 0.9192035398230088,
"calib/mu_w": 0.9229078014184396,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4763779527559056,
"calib/std_conf": 0.06599919023583715,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7541038318912237,
"calib/step_q_c_n": 809.0,
"calib/step_q_gap": -0.003176692453345731,
"calib/step_q_w": 0.7572805243445694,
"calib/step_q_w_n": 1335.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2091.0,
"completions/max_terminated_length": 2091.0,
"completions/mean_length": 447.69140625,
"completions/mean_terminated_length": 449.44708251953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.005980294197797775,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0309,
"num_tokens": 13724159.0,
"reward": 0.935546875,
"reward_std": 0.3506472110748291,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/format_reward_step": 0.98828125,
"step": 62
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4617628205128205,
"calib/avg_num_step_conf": 8.5546875,
"calib/ece": 0.43656000000000006,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.7,
"calib/gap": -0.0066858974358974255,
"calib/mean_conf": 0.9165599999999999,
"calib/mu_c": 0.9130833333333332,
"calib/mu_w": 0.9197692307692307,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.43656000000000006,
"calib/std_conf": 0.06672455619934838,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7589250814332248,
"calib/step_q_c_n": 921.0,
"calib/step_q_gap": 0.014565585767346123,
"calib/step_q_w": 0.7443594956658787,
"calib/step_q_w_n": 1269.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2741.0,
"completions/max_terminated_length": 2741.0,
"completions/mean_length": 508.91796875,
"completions/mean_terminated_length": 508.91796875,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.0672,
"grad_norm": 0.006933415308594704,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.1136,
"num_tokens": 13963082.0,
"reward": 0.95703125,
"reward_std": 0.38261866569519043,
"rewards/accuracy_reward_step": 0.46875,
"rewards/format_reward_step": 0.9765625,
"step": 63
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.493006993006993,
"calib/avg_num_step_conf": 6.98828125,
"calib/ece": 0.36474308300395264,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6007905138339921,
"calib/gap": -0.012349650349650299,
"calib/mean_conf": 0.8968379446640315,
"calib/mu_c": 0.8914685314685316,
"calib/mu_w": 0.9038181818181819,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3481818181818182,
"calib/std_conf": 0.11424040866203894,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7302215189873418,
"calib/step_q_c_n": 948.0,
"calib/step_q_gap": 0.011339235990909025,
"calib/step_q_w": 0.7188822829964328,
"calib/step_q_w_n": 841.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2457.0,
"completions/max_terminated_length": 2457.0,
"completions/mean_length": 404.09375,
"completions/mean_terminated_length": 404.09375,
"completions/min_length": 123.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.006458755116909742,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0435,
"num_tokens": 14170306.0,
"reward": 1.052734375,
"reward_std": 0.2793656885623932,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.98828125,
"step": 64
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4552754435107376,
"calib/avg_num_step_conf": 6.6484375,
"calib/ece": 0.3721259842519685,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.5511811023622047,
"calib/gap": -0.005517584811702503,
"calib/mean_conf": 0.9036220472440945,
"calib/mu_c": 0.901037037037037,
"calib/mu_w": 0.9065546218487395,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3721259842519685,
"calib/std_conf": 0.06376839603453004,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7453848684210526,
"calib/step_q_c_n": 912.0,
"calib/step_q_gap": 0.01815702031978672,
"calib/step_q_w": 0.7272278481012658,
"calib/step_q_w_n": 790.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1137.0,
"completions/max_terminated_length": 1137.0,
"completions/mean_length": 365.46484375,
"completions/mean_terminated_length": 365.46484375,
"completions/min_length": 120.0,
"completions/min_terminated_length": 120.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.006170464679598808,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0123,
"num_tokens": 14368889.0,
"reward": 1.0234375,
"reward_std": 0.22531628608703613,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.9921875,
"step": 65
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4909408773045136,
"calib/avg_num_step_conf": 8.23046875,
"calib/ece": 0.48683794466403163,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6245059288537549,
"calib/gap": -0.00655244755244766,
"calib/mean_conf": 0.913794466403162,
"calib/mu_c": 0.9100909090909091,
"calib/mu_w": 0.9166433566433567,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4829249011857707,
"calib/std_conf": 0.061488689542966525,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7438579099545641,
"calib/step_q_c_n": 807.0,
"calib/step_q_gap": 0.09218137149302574,
"calib/step_q_w": 0.6516765384615384,
"calib/step_q_w_n": 1300.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2443.0,
"completions/max_terminated_length": 2443.0,
"completions/mean_length": 483.0546875,
"completions/mean_terminated_length": 483.0546875,
"completions/min_length": 130.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.0704,
"grad_norm": 0.006618858780711889,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0437,
"num_tokens": 14598903.0,
"reward": 0.923828125,
"reward_std": 0.28461480140686035,
"rewards/accuracy_reward_step": 0.4296875,
"rewards/format_reward_step": 0.98828125,
"step": 66
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5161099339399228,
"calib/avg_num_step_conf": 7.0234375,
"calib/ece": 0.3441568627450981,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.5843137254901961,
"calib/gap": 0.014226598529228585,
"calib/mean_conf": 0.9010196078431372,
"calib/mu_c": 0.9073239436619719,
"calib/mu_w": 0.8930973451327433,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3441568627450981,
"calib/std_conf": 0.0878977771242005,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7455732484076433,
"calib/step_q_c_n": 942.0,
"calib/step_q_gap": 0.039343809155306775,
"calib/step_q_w": 0.7062294392523365,
"calib/step_q_w_n": 856.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1691.0,
"completions/max_terminated_length": 1691.0,
"completions/mean_length": 428.17578125,
"completions/mean_terminated_length": 429.85491943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.0060368194244802,
"learning_rate": 3.694444444444445e-06,
"loss": 0.0359,
"num_tokens": 14813524.0,
"reward": 1.052734375,
"reward_std": 0.2020040899515152,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.99609375,
"step": 67
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4300887664524028,
"calib/avg_num_step_conf": 7.0546875,
"calib/ece": 0.37574218750000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.578125,
"calib/gap": -0.008566268748086858,
"calib/mean_conf": 0.9030859375,
"calib/mu_c": 0.8990370370370371,
"calib/mu_w": 0.9076033057851239,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.37574218750000005,
"calib/std_conf": 0.06972776609605454,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7468965517241378,
"calib/step_q_c_n": 870.0,
"calib/step_q_gap": 0.03230253463012056,
"calib/step_q_w": 0.7145940170940173,
"calib/step_q_w_n": 936.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1199.0,
"completions/max_terminated_length": 1199.0,
"completions/mean_length": 396.6875,
"completions/mean_terminated_length": 398.2431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.006941006053239107,
"learning_rate": 3.6666666666666666e-06,
"loss": -0.0138,
"num_tokens": 15019164.0,
"reward": 1.025390625,
"reward_std": 0.26476049423217773,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.99609375,
"step": 68
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.41418971310416125,
"calib/avg_num_step_conf": 7.546875,
"calib/ece": 0.5014285714285714,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6309523809523809,
"calib/gap": -0.029346084259498584,
"calib/mean_conf": 0.9038888888888889,
"calib/mu_c": 0.8868867924528301,
"calib/mu_w": 0.9162328767123287,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.49234126984126986,
"calib/std_conf": 0.09921895158050041,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7287564102564104,
"calib/step_q_c_n": 780.0,
"calib/step_q_gap": 0.005631670673077016,
"calib/step_q_w": 0.7231247395833333,
"calib/step_q_w_n": 1152.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2034.0,
"completions/max_terminated_length": 2034.0,
"completions/mean_length": 487.453125,
"completions/mean_terminated_length": 489.36474609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.0736,
"grad_norm": 0.006305481307208538,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0023,
"num_tokens": 15248448.0,
"reward": 0.90625,
"reward_std": 0.2847837507724762,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/format_reward_step": 0.984375,
"step": 69
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4276155717761557,
"calib/avg_num_step_conf": 7.65234375,
"calib/ece": 0.44107569721115536,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.549800796812749,
"calib/gap": -0.006924061979767027,
"calib/mean_conf": 0.890796812749004,
"calib/mu_c": 0.8870175438596491,
"calib/mu_w": 0.8939416058394162,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4388446215139442,
"calib/std_conf": 0.10985757961381803,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7327112258064516,
"calib/step_q_c_n": 775.0,
"calib/step_q_gap": 0.016503455536181377,
"calib/step_q_w": 0.7162077702702703,
"calib/step_q_w_n": 1184.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2984.0,
"completions/max_terminated_length": 2984.0,
"completions/mean_length": 463.96484375,
"completions/mean_terminated_length": 463.96484375,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.00585549883544445,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.0599,
"num_tokens": 15474215.0,
"reward": 0.935546875,
"reward_std": 0.23678159713745117,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/format_reward_step": 0.98046875,
"step": 70
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5046290343320046,
"calib/avg_num_step_conf": 8.28125,
"calib/ece": 0.48552941176470577,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.47058823529411764,
"calib/gap": 0.010653208177960893,
"calib/mean_conf": 0.8798039215686273,
"calib/mu_c": 0.8862376237623765,
"calib/mu_w": 0.8755844155844156,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.48462745098039206,
"calib/std_conf": 0.11227225655728326,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7350923076923077,
"calib/step_q_c_n": 715.0,
"calib/step_q_gap": 0.0069001368738023094,
"calib/step_q_w": 0.7281921708185054,
"calib/step_q_w_n": 1405.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2064.0,
"completions/max_terminated_length": 2064.0,
"completions/mean_length": 489.91015625,
"completions/mean_terminated_length": 489.91015625,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.006372661795467138,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0244,
"num_tokens": 15704040.0,
"reward": 0.890625,
"reward_std": 0.3063148856163025,
"rewards/accuracy_reward_step": 0.39453125,
"rewards/format_reward_step": 0.9921875,
"step": 71
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.39437746062992124,
"calib/avg_num_step_conf": 7.5078125,
"calib/ece": 0.3997254901960786,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.48627450980392156,
"calib/gap": -0.02038939468503942,
"calib/mean_conf": 0.8896078431372548,
"calib/mu_c": 0.8794531250000001,
"calib/mu_w": 0.8998425196850395,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3936862745098041,
"calib/std_conf": 0.08992722077616107,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7377215189873417,
"calib/step_q_c_n": 869.0,
"calib/step_q_gap": 0.028786666185822374,
"calib/step_q_w": 0.7089348528015194,
"calib/step_q_w_n": 1053.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2255.0,
"completions/max_terminated_length": 2255.0,
"completions/mean_length": 410.21484375,
"completions/mean_terminated_length": 410.21484375,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.0768,
"grad_norm": 0.008393477648496628,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0227,
"num_tokens": 15913463.0,
"reward": 0.998046875,
"reward_std": 0.296690434217453,
"rewards/accuracy_reward_step": 0.5,
"rewards/format_reward_step": 0.99609375,
"step": 72
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4570565797838525,
"calib/avg_num_step_conf": 6.86328125,
"calib/ece": 0.3254545454545452,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.48616600790513836,
"calib/gap": -0.0005804195804196555,
"calib/mean_conf": 0.8906719367588934,
"calib/mu_c": 0.8904195804195802,
"calib/mu_w": 0.8909999999999999,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3254545454545452,
"calib/std_conf": 0.0767930788514829,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7313693901035674,
"calib/step_q_c_n": 869.0,
"calib/step_q_gap": 0.03222524595942333,
"calib/step_q_w": 0.699144144144144,
"calib/step_q_w_n": 888.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2585.0,
"completions/max_terminated_length": 2585.0,
"completions/mean_length": 415.4921875,
"completions/mean_terminated_length": 417.12158203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.007174800615757704,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0286,
"num_tokens": 16126861.0,
"reward": 1.052734375,
"reward_std": 0.3073679804801941,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/format_reward_step": 0.98828125,
"step": 73
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4718685031185031,
"calib/avg_num_step_conf": 7.62890625,
"calib/ece": 0.4692857142857143,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.42063492063492064,
"calib/gap": -0.004851871101870908,
"calib/mean_conf": 0.8819841269841271,
"calib/mu_c": 0.8791346153846155,
"calib/mu_w": 0.8839864864864864,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4692857142857143,
"calib/std_conf": 0.0748333581638504,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.727112582781457,
"calib/step_q_c_n": 755.0,
"calib/step_q_gap": 0.01272193169631508,
"calib/step_q_w": 0.7143906510851419,
"calib/step_q_w_n": 1198.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2249.0,
"completions/max_terminated_length": 2249.0,
"completions/mean_length": 435.9296875,
"completions/mean_terminated_length": 437.6392517089844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.008243421092629433,
"learning_rate": 3.5e-06,
"loss": 0.047,
"num_tokens": 16342387.0,
"reward": 0.89453125,
"reward_std": 0.3572673499584198,
"rewards/accuracy_reward_step": 0.40625,
"rewards/format_reward_step": 0.9765625,
"step": 74
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4807967313585291,
"calib/avg_num_step_conf": 7.734375,
"calib/ece": 0.24551181102362202,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4448818897637795,
"calib/gap": -0.0036036772216547774,
"calib/mean_conf": 0.8865354330708661,
"calib/mu_c": 0.8852727272727272,
"calib/mu_w": 0.888876404494382,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24122047244094486,
"calib/std_conf": 0.06912134659800193,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7340513557929335,
"calib/step_q_c_n": 1217.0,
"calib/step_q_gap": 0.005912430498044907,
"calib/step_q_w": 0.7281389252948886,
"calib/step_q_w_n": 763.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2545.0,
"completions/max_terminated_length": 2545.0,
"completions/mean_length": 421.14453125,
"completions/mean_terminated_length": 421.14453125,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.08,
"grad_norm": 0.006616497877985239,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0201,
"num_tokens": 16554952.0,
"reward": 1.140625,
"reward_std": 0.2210792601108551,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/format_reward_step": 0.9921875,
"step": 75
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49542763157894737,
"calib/avg_num_step_conf": 7.7578125,
"calib/ece": 0.2944444444444444,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.4523809523809524,
"calib/gap": -0.004602631578947247,
"calib/mean_conf": 0.8795238095238096,
"calib/mu_c": 0.8776973684210526,
"calib/mu_w": 0.8822999999999999,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2853968253968253,
"calib/std_conf": 0.09486295162937285,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7230081227436824,
"calib/step_q_c_n": 1108.0,
"calib/step_q_gap": 0.016807667162816675,
"calib/step_q_w": 0.7062004555808657,
"calib/step_q_w_n": 878.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2520.0,
"completions/max_terminated_length": 2520.0,
"completions/mean_length": 445.609375,
"completions/mean_terminated_length": 447.3569030761719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.008111460134387016,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0276,
"num_tokens": 16772084.0,
"reward": 1.0859375,
"reward_std": 0.34697067737579346,
"rewards/accuracy_reward_step": 0.59375,
"rewards/format_reward_step": 0.984375,
"step": 76
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.45514851485148516,
"calib/avg_num_step_conf": 7.65625,
"calib/ece": 0.27804780876494023,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.41832669322709165,
"calib/gap": -0.010414521452145498,
"calib/mean_conf": 0.8756573705179284,
"calib/mu_c": 0.8714666666666665,
"calib/mu_w": 0.881881188118812,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.27804780876494023,
"calib/std_conf": 0.08224360215285961,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7159497487437188,
"calib/step_q_c_n": 995.0,
"calib/step_q_gap": 0.032706225427656666,
"calib/step_q_w": 0.6832435233160621,
"calib/step_q_w_n": 965.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2512.0,
"completions/max_terminated_length": 2512.0,
"completions/mean_length": 423.62109375,
"completions/mean_terminated_length": 423.62109375,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.007898428477346897,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0292,
"num_tokens": 16985195.0,
"reward": 1.07421875,
"reward_std": 0.32221925258636475,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.9765625,
"step": 77
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.546784715750233,
"calib/avg_num_step_conf": 7.63671875,
"calib/ece": 0.3200390625,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.41796875,
"calib/gap": 0.017716682199440892,
"calib/mean_conf": 0.8830078124999999,
"calib/mu_c": 0.8906896551724138,
"calib/mu_w": 0.8729729729729729,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.31832031250000004,
"calib/std_conf": 0.08451571563303976,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7521550946798918,
"calib/step_q_c_n": 1109.0,
"calib/step_q_gap": 0.02726502375790607,
"calib/step_q_w": 0.7248900709219858,
"calib/step_q_w_n": 846.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1368.0,
"completions/max_terminated_length": 1368.0,
"completions/mean_length": 451.578125,
"completions/mean_terminated_length": 453.34906005859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.0832,
"grad_norm": 0.007119826041162014,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0059,
"num_tokens": 17208823.0,
"reward": 1.06640625,
"reward_std": 0.2874803841114044,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 1.0,
"step": 78
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5097412967103161,
"calib/avg_num_step_conf": 8.07421875,
"calib/ece": 0.2846875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.4296875,
"calib/gap": 0.0012328329607155242,
"calib/mean_conf": 0.877578125,
"calib/mu_c": 0.8780645161290322,
"calib/mu_w": 0.8768316831683167,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.27839843749999993,
"calib/std_conf": 0.08033860075632619,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7329100887812753,
"calib/step_q_c_n": 1239.0,
"calib/step_q_gap": 0.02181105496485025,
"calib/step_q_w": 0.711099033816425,
"calib/step_q_w_n": 828.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1145.0,
"completions/max_terminated_length": 1145.0,
"completions/mean_length": 466.0078125,
"completions/mean_terminated_length": 467.8353271484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.007006760220974684,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.0242,
"num_tokens": 17434497.0,
"reward": 1.10546875,
"reward_std": 0.30984586477279663,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 1.0,
"step": 79
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.49135483870967744,
"calib/avg_num_step_conf": 7.62890625,
"calib/ece": 0.28654901960784307,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.45098039215686275,
"calib/gap": 0.0019290322580646224,
"calib/mean_conf": 0.8853725490196079,
"calib/mu_c": 0.8861290322580645,
"calib/mu_w": 0.8841999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28203921568627444,
"calib/std_conf": 0.07883314739877266,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7555366284201236,
"calib/step_q_c_n": 1133.0,
"calib/step_q_gap": 0.03328784793231887,
"calib/step_q_w": 0.7222487804878047,
"calib/step_q_w_n": 820.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1248.0,
"completions/max_terminated_length": 1248.0,
"completions/mean_length": 422.24609375,
"completions/mean_terminated_length": 423.9019775390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.006706281565129757,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0001,
"num_tokens": 17644752.0,
"reward": 1.107421875,
"reward_std": 0.2589503526687622,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.99609375,
"step": 80
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5105316584189823,
"calib/avg_num_step_conf": 7.81640625,
"calib/ece": 0.33027667984189735,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.45454545454545453,
"calib/gap": 0.007438142367719669,
"calib/mean_conf": 0.8868774703557312,
"calib/mu_c": 0.8901408450704226,
"calib/mu_w": 0.8827027027027029,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3279446640316207,
"calib/std_conf": 0.07486042880397843,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7595820271682341,
"calib/step_q_c_n": 957.0,
"calib/step_q_gap": 0.04866248693834896,
"calib/step_q_w": 0.7109195402298851,
"calib/step_q_w_n": 1044.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3037.0,
"completions/max_terminated_length": 3037.0,
"completions/mean_length": 473.12109375,
"completions/mean_terminated_length": 473.12109375,
"completions/min_length": 160.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.0864,
"grad_norm": 0.006804213859140873,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.0445,
"num_tokens": 17872119.0,
"reward": 1.048828125,
"reward_std": 0.2571106553077698,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.98828125,
"step": 81
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.53250062924742,
"calib/avg_num_step_conf": 6.44921875,
"calib/ece": 0.3499604743083004,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.35968379446640314,
"calib/gap": 0.0020922476717843974,
"calib/mean_conf": 0.8741501976284586,
"calib/mu_c": 0.8751094890510948,
"calib/mu_w": 0.8730172413793104,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.341304347826087,
"calib/std_conf": 0.06940578658457372,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7785149752475247,
"calib/step_q_c_n": 808.0,
"calib/step_q_gap": 0.02590524808263739,
"calib/step_q_w": 0.7526097271648873,
"calib/step_q_w_n": 843.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2833.0,
"completions/max_terminated_length": 2833.0,
"completions/mean_length": 410.4453125,
"completions/mean_terminated_length": 410.4453125,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.007754732389003038,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0765,
"num_tokens": 18082745.0,
"reward": 1.02734375,
"reward_std": 0.2921496033668518,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/format_reward_step": 0.984375,
"step": 82
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4460887296094908,
"calib/avg_num_step_conf": 7.21875,
"calib/ece": 0.3640392156862745,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.4745098039215686,
"calib/gap": -0.010126050420167787,
"calib/mean_conf": 0.8973725490196078,
"calib/mu_c": 0.8926470588235295,
"calib/mu_w": 0.9027731092436972,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3640392156862745,
"calib/std_conf": 0.06317543602565724,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7684606613454961,
"calib/step_q_c_n": 877.0,
"calib/step_q_gap": 0.002765501716247898,
"calib/step_q_w": 0.7656951596292482,
"calib/step_q_w_n": 971.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2606.0,
"completions/max_terminated_length": 2606.0,
"completions/mean_length": 495.95703125,
"completions/mean_terminated_length": 495.95703125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.005239543970674276,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.017,
"num_tokens": 18316974.0,
"reward": 1.029296875,
"reward_std": 0.19005656242370605,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.99609375,
"step": 83
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4947182004598273,
"calib/avg_num_step_conf": 6.48828125,
"calib/ece": 0.3700393700787401,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.48031496062992124,
"calib/gap": 0.0010520101907661727,
"calib/mean_conf": 0.8897244094488189,
"calib/mu_c": 0.8902255639097744,
"calib/mu_w": 0.8891735537190082,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36807086614173223,
"calib/std_conf": 0.06928432543249231,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7825955794504182,
"calib/step_q_c_n": 837.0,
"calib/step_q_gap": 0.008748492071777303,
"calib/step_q_w": 0.7738470873786409,
"calib/step_q_w_n": 824.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1358.0,
"completions/max_terminated_length": 1358.0,
"completions/mean_length": 406.80078125,
"completions/mean_terminated_length": 408.3960876464844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.0896,
"grad_norm": 0.00738998968154192,
"learning_rate": 3.2222222222222227e-06,
"loss": -0.0329,
"num_tokens": 18527035.0,
"reward": 1.015625,
"reward_std": 0.2715874910354614,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/format_reward_step": 0.9921875,
"step": 84
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5067310134620269,
"calib/avg_num_step_conf": 6.69140625,
"calib/ece": 0.3924701195219124,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5059760956175299,
"calib/gap": 0.0029857759715518073,
"calib/mean_conf": 0.8984462151394423,
"calib/mu_c": 0.8999212598425195,
"calib/mu_w": 0.8969354838709677,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3924701195219124,
"calib/std_conf": 0.06390025917885646,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.782581863979849,
"calib/step_q_c_n": 794.0,
"calib/step_q_gap": 0.020721145807922836,
"calib/step_q_w": 0.7618607181719261,
"calib/step_q_w_n": 919.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2985.0,
"completions/max_terminated_length": 2985.0,
"completions/mean_length": 472.39453125,
"completions/mean_terminated_length": 476.1141662597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.0065482864156365395,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.012,
"num_tokens": 18755792.0,
"reward": 0.986328125,
"reward_std": 0.27897346019744873,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.98046875,
"step": 85
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.47883027240231724,
"calib/avg_num_step_conf": 6.6171875,
"calib/ece": 0.3670196078431373,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.45098039215686275,
"calib/gap": 0.005936768149882643,
"calib/mean_conf": 0.8885882352941177,
"calib/mu_c": 0.8914285714285713,
"calib/mu_w": 0.8854918032786887,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3670196078431373,
"calib/std_conf": 0.08120880288191216,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8187168141592921,
"calib/step_q_c_n": 904.0,
"calib/step_q_gap": 0.03227377618460858,
"calib/step_q_w": 0.7864430379746835,
"calib/step_q_w_n": 790.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1225.0,
"completions/max_terminated_length": 1225.0,
"completions/mean_length": 435.76953125,
"completions/mean_terminated_length": 437.47845458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.0072197201661765575,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0072,
"num_tokens": 18972861.0,
"reward": 1.017578125,
"reward_std": 0.22306768596172333,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/format_reward_step": 0.99609375,
"step": 86
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4983501683501683,
"calib/avg_num_step_conf": 5.84375,
"calib/ece": 0.2562352941176471,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.41568627450980394,
"calib/gap": -0.0013737373737373604,
"calib/mean_conf": 0.8946666666666667,
"calib/mu_c": 0.8941818181818183,
"calib/mu_w": 0.8955555555555557,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2519215686274511,
"calib/std_conf": 0.05499898988971424,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7833549083063647,
"calib/step_q_c_n": 927.0,
"calib/step_q_gap": -1.8579502257809466e-06,
"calib/step_q_w": 0.7833567662565905,
"calib/step_q_w_n": 569.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1564.0,
"completions/max_terminated_length": 1564.0,
"completions/mean_length": 395.4375,
"completions/mean_terminated_length": 395.4375,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.0928,
"grad_norm": 0.00887050200253725,
"learning_rate": 3.138888888888889e-06,
"loss": 0.0289,
"num_tokens": 19179589.0,
"reward": 1.142578125,
"reward_std": 0.29103517532348633,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/format_reward_step": 0.99609375,
"step": 87
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.45328185328185333,
"calib/avg_num_step_conf": 6.07421875,
"calib/ece": 0.33593625498007973,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.42231075697211157,
"calib/gap": -0.0045019305019303735,
"calib/mean_conf": 0.8937051792828685,
"calib/mu_c": 0.8917142857142858,
"calib/mu_w": 0.8962162162162162,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.33593625498007973,
"calib/std_conf": 0.04975054037893203,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.8097133027522935,
"calib/step_q_c_n": 872.0,
"calib/step_q_gap": 0.013877578008515856,
"calib/step_q_w": 0.7958357247437776,
"calib/step_q_w_n": 683.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2556.0,
"completions/max_terminated_length": 2556.0,
"completions/mean_length": 467.12890625,
"completions/mean_terminated_length": 467.12890625,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.006494402419775724,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0539,
"num_tokens": 19409022.0,
"reward": 1.037109375,
"reward_std": 0.25544431805610657,
"rewards/accuracy_reward_step": 0.546875,
"rewards/format_reward_step": 0.98046875,
"step": 88
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4289703965741948,
"calib/avg_num_step_conf": 5.62109375,
"calib/ece": 0.3801574803149607,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4448818897637795,
"calib/gap": -0.010303481660770775,
"calib/mean_conf": 0.8959055118110236,
"calib/mu_c": 0.8909160305343511,
"calib/mu_w": 0.9012195121951219,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3801574803149607,
"calib/std_conf": 0.0515179707306355,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8084137931034483,
"calib/step_q_c_n": 725.0,
"calib/step_q_gap": 0.033035641842944274,
"calib/step_q_w": 0.775378151260504,
"calib/step_q_w_n": 714.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1540.0,
"completions/max_terminated_length": 1540.0,
"completions/mean_length": 442.67578125,
"completions/mean_terminated_length": 444.41180419921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 123.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.007002872880548239,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.0215,
"num_tokens": 19631235.0,
"reward": 1.0078125,
"reward_std": 0.24216635525226593,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.9921875,
"step": 89
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.41836536594835955,
"calib/avg_num_step_conf": 5.66796875,
"calib/ece": 0.30055118110236223,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.484251968503937,
"calib/gap": -0.016638840354623596,
"calib/mean_conf": 0.8904724409448818,
"calib/mu_c": 0.8838562091503269,
"calib/mu_w": 0.9004950495049505,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29433070866141736,
"calib/std_conf": 0.08478187174114259,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7948888888888888,
"calib/step_q_c_n": 810.0,
"calib/step_q_gap": -0.0031003466805341606,
"calib/step_q_w": 0.7979892355694229,
"calib/step_q_w_n": 641.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2627.0,
"completions/max_terminated_length": 2627.0,
"completions/mean_length": 428.78515625,
"completions/mean_terminated_length": 428.78515625,
"completions/min_length": 107.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.096,
"grad_norm": 0.006482626777142286,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0323,
"num_tokens": 19844324.0,
"reward": 1.09375,
"reward_std": 0.2187202423810959,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.9921875,
"step": 90
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.45087365591397854,
"calib/avg_num_step_conf": 5.73046875,
"calib/ece": 0.2769322709163346,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.4262948207171315,
"calib/gap": -0.006774865591397949,
"calib/mean_conf": 0.8944621513944223,
"calib/mu_c": 0.8918709677419355,
"calib/mu_w": 0.8986458333333335,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2769322709163346,
"calib/std_conf": 0.0498284863261445,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.813194130925508,
"calib/step_q_c_n": 886.0,
"calib/step_q_gap": -0.011521875959173533,
"calib/step_q_w": 0.8247160068846815,
"calib/step_q_w_n": 581.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2595.0,
"completions/max_terminated_length": 2595.0,
"completions/mean_length": 440.40625,
"completions/mean_terminated_length": 440.40625,
"completions/min_length": 151.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.007019788958132267,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.0395,
"num_tokens": 20064780.0,
"reward": 1.095703125,
"reward_std": 0.1930152028799057,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.98046875,
"step": 91
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4741679566563467,
"calib/avg_num_step_conf": 4.79296875,
"calib/ece": 0.2944881889763781,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.35826771653543305,
"calib/gap": -0.00839654282765745,
"calib/mean_conf": 0.8837007874015748,
"calib/mu_c": 0.880328947368421,
"calib/mu_w": 0.8887254901960785,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.28988188976377965,
"calib/std_conf": 0.05577377180563677,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8058981233243968,
"calib/step_q_c_n": 746.0,
"calib/step_q_gap": -0.020068612642339168,
"calib/step_q_w": 0.825966735966736,
"calib/step_q_w_n": 481.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2160.0,
"completions/max_terminated_length": 2160.0,
"completions/mean_length": 392.453125,
"completions/mean_terminated_length": 392.453125,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.008154223673045635,
"learning_rate": 3e-06,
"loss": -0.0069,
"num_tokens": 20271968.0,
"reward": 1.08984375,
"reward_std": 0.2793588936328888,
"rewards/accuracy_reward_step": 0.59375,
"rewards/format_reward_step": 0.9921875,
"step": 92
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4661604799400075,
"calib/avg_num_step_conf": 5.87109375,
"calib/ece": 0.3822134387351777,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3201581027667984,
"calib/gap": -0.0037995250593676477,
"calib/mean_conf": 0.8806324110671937,
"calib/mu_c": 0.8787401574803146,
"calib/mu_w": 0.8825396825396823,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.38043478260869545,
"calib/std_conf": 0.05129542484083523,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8162957540263542,
"calib/step_q_c_n": 683.0,
"calib/step_q_gap": 0.005393315001963939,
"calib/step_q_w": 0.8109024390243903,
"calib/step_q_w_n": 820.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2888.0,
"completions/max_terminated_length": 2888.0,
"completions/mean_length": 461.25390625,
"completions/mean_terminated_length": 461.25390625,
"completions/min_length": 119.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.0992,
"grad_norm": 0.007273159455507994,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0276,
"num_tokens": 20495825.0,
"reward": 0.986328125,
"reward_std": 0.2531079649925232,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.98046875,
"step": 93
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.46136277122192615,
"calib/avg_num_step_conf": 4.94921875,
"calib/ece": 0.32434782608695667,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.31620553359683795,
"calib/gap": 0.0037400076132469984,
"calib/mean_conf": 0.8856126482213439,
"calib/mu_c": 0.8872535211267606,
"calib/mu_w": 0.8835135135135136,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.32434782608695667,
"calib/std_conf": 0.05223956822413295,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8169861111111112,
"calib/step_q_c_n": 720.0,
"calib/step_q_gap": 0.0012822719886249612,
"calib/step_q_w": 0.8157038391224862,
"calib/step_q_w_n": 547.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2696.0,
"completions/max_terminated_length": 2696.0,
"completions/mean_length": 408.25,
"completions/mean_terminated_length": 409.85101318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.007511666510254145,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0165,
"num_tokens": 20709017.0,
"reward": 1.046875,
"reward_std": 0.26490265130996704,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.984375,
"step": 94
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4770461020461021,
"calib/avg_num_step_conf": 5.0078125,
"calib/ece": 0.26949019607843144,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.28627450980392155,
"calib/gap": -0.004545454545454741,
"calib/mean_conf": 0.8784313725490196,
"calib/mu_c": 0.8766666666666666,
"calib/mu_w": 0.8812121212121213,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2680784313725491,
"calib/std_conf": 0.05112738988209085,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8057089552238805,
"calib/step_q_c_n": 804.0,
"calib/step_q_gap": 0.015708955223880494,
"calib/step_q_w": 0.79,
"calib/step_q_w_n": 478.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1246.0,
"completions/max_terminated_length": 1246.0,
"completions/mean_length": 412.42578125,
"completions/mean_terminated_length": 414.04315185546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.006043555215001106,
"learning_rate": 2.916666666666667e-06,
"loss": -0.0196,
"num_tokens": 20920726.0,
"reward": 1.10546875,
"reward_std": 0.20898544788360596,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.9921875,
"step": 95
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.41727759763146766,
"calib/avg_num_step_conf": 5.1171875,
"calib/ece": 0.20921568627450987,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.2980392156862745,
"calib/gap": -0.012831665021852623,
"calib/mean_conf": 0.8823921568627451,
"calib/mu_c": 0.8782658959537573,
"calib/mu_w": 0.8910975609756099,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2065882352941177,
"calib/std_conf": 0.047396565785815324,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8122093023255814,
"calib/step_q_c_n": 860.0,
"calib/step_q_gap": 0.006564857881136921,
"calib/step_q_w": 0.8056444444444445,
"calib/step_q_w_n": 450.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1030.0,
"completions/max_terminated_length": 1030.0,
"completions/mean_length": 386.23828125,
"completions/mean_terminated_length": 387.7529602050781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.1024,
"grad_norm": 0.0067080045118927956,
"learning_rate": 2.888888888888889e-06,
"loss": 0.0224,
"num_tokens": 21125419.0,
"reward": 1.177734375,
"reward_std": 0.19021794199943542,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/format_reward_step": 0.99609375,
"step": 96
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5540832715591797,
"calib/avg_num_step_conf": 5.14453125,
"calib/ece": 0.3196874999999999,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.2265625,
"calib/gap": 0.00978502594514441,
"calib/mean_conf": 0.874375,
"calib/mu_c": 0.878732394366197,
"calib/mu_w": 0.8689473684210526,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3196874999999999,
"calib/std_conf": 0.04965112662367291,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8144895104895103,
"calib/step_q_c_n": 715.0,
"calib/step_q_gap": 0.008011105173895694,
"calib/step_q_w": 0.8064784053156147,
"calib/step_q_w_n": 602.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 922.0,
"completions/max_terminated_length": 922.0,
"completions/mean_length": 406.0859375,
"completions/mean_terminated_length": 407.678466796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.007084716111421585,
"learning_rate": 2.861111111111111e-06,
"loss": 0.016,
"num_tokens": 21334449.0,
"reward": 1.0546875,
"reward_std": 0.2218756079673767,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 1.0,
"step": 97
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4697322081201408,
"calib/avg_num_step_conf": 4.5703125,
"calib/ece": 0.2829600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.272,
"calib/gap": -0.0010804704631537376,
"calib/mean_conf": 0.8789600000000001,
"calib/mu_c": 0.8785234899328858,
"calib/mu_w": 0.8796039603960395,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2829600000000001,
"calib/std_conf": 0.04746913102217061,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.808751950078003,
"calib/step_q_c_n": 641.0,
"calib/step_q_gap": -0.0006620385798418171,
"calib/step_q_w": 0.8094139886578449,
"calib/step_q_w_n": 529.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2911.0,
"completions/max_terminated_length": 2911.0,
"completions/mean_length": 439.12890625,
"completions/mean_terminated_length": 440.85101318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.008098619990050793,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0601,
"num_tokens": 21553050.0,
"reward": 1.0703125,
"reward_std": 0.2985646426677704,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.9765625,
"step": 98
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.44878862120593443,
"calib/avg_num_step_conf": 5.13671875,
"calib/ece": 0.5012749003984065,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.2788844621513944,
"calib/gap": -0.0076310058527289115,
"calib/mean_conf": 0.871792828685259,
"calib/mu_c": 0.866989247311828,
"calib/mu_w": 0.8746202531645569,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.5012749003984065,
"calib/std_conf": 0.05820328529122533,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.8003992015968064,
"calib/step_q_c_n": 501.0,
"calib/step_q_gap": 0.00536234655995127,
"calib/step_q_w": 0.7950368550368552,
"calib/step_q_w_n": 814.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2612.0,
"completions/max_terminated_length": 2612.0,
"completions/mean_length": 507.890625,
"completions/mean_terminated_length": 507.890625,
"completions/min_length": 133.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.1056,
"grad_norm": 0.008391114883124828,
"learning_rate": 2.805555555555556e-06,
"loss": 0.0662,
"num_tokens": 21788870.0,
"reward": 0.8515625,
"reward_std": 0.3123432695865631,
"rewards/accuracy_reward_step": 0.36328125,
"rewards/format_reward_step": 0.9765625,
"step": 99
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.48122427983539096,
"calib/avg_num_step_conf": 4.62109375,
"calib/ece": 0.2957142857142858,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.2261904761904762,
"calib/gap": 0.0043981481481480955,
"calib/mean_conf": 0.8671428571428571,
"calib/mu_c": 0.8690277777777777,
"calib/mu_w": 0.8646296296296296,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2957142857142858,
"calib/std_conf": 0.06583839212458807,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.806955345060893,
"calib/step_q_c_n": 739.0,
"calib/step_q_gap": 0.005649038754586666,
"calib/step_q_w": 0.8013063063063063,
"calib/step_q_w_n": 444.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2405.0,
"completions/max_terminated_length": 2405.0,
"completions/mean_length": 455.484375,
"completions/mean_terminated_length": 459.07086181640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.007559601683169603,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0124,
"num_tokens": 22012882.0,
"reward": 1.052734375,
"reward_std": 0.26982903480529785,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.98046875,
"step": 100
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5526965562053282,
"calib/avg_num_step_conf": 4.859375,
"calib/ece": 0.41590361445783147,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.24497991967871485,
"calib/gap": 0.010423001949317778,
"calib/mean_conf": 0.8737349397590362,
"calib/mu_c": 0.8793859649122807,
"calib/mu_w": 0.8689629629629629,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.41590361445783147,
"calib/std_conf": 0.05195095408766677,
"calib/step_conf_rate": 0.97265625,
"calib/step_q_c": 0.8095379537953795,
"calib/step_q_c_n": 606.0,
"calib/step_q_gap": 0.01179500708691561,
"calib/step_q_w": 0.7977429467084639,
"calib/step_q_w_n": 638.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2611.0,
"completions/max_terminated_length": 2611.0,
"completions/mean_length": 503.3828125,
"completions/mean_terminated_length": 503.3828125,
"completions/min_length": 129.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.0077116601169109344,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0604,
"num_tokens": 22248740.0,
"reward": 0.9296875,
"reward_std": 0.32882392406463623,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/format_reward_step": 0.96875,
"step": 101
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.48478319418260163,
"calib/avg_num_step_conf": 4.6171875,
"calib/ece": 0.2480555555555557,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.24206349206349206,
"calib/gap": -0.005557500673309734,
"calib/mean_conf": 0.8725793650793652,
"calib/mu_c": 0.8705063291139242,
"calib/mu_w": 0.876063829787234,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.24682539682539695,
"calib/std_conf": 0.05092101776664059,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.8058095238095238,
"calib/step_q_c_n": 735.0,
"calib/step_q_gap": 0.0016931927133267966,
"calib/step_q_w": 0.804116331096197,
"calib/step_q_w_n": 447.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2261.0,
"completions/max_terminated_length": 2261.0,
"completions/mean_length": 381.67578125,
"completions/mean_terminated_length": 386.20159912109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.1088,
"grad_norm": 0.008189848624169827,
"learning_rate": 2.7222222222222224e-06,
"loss": -0.0209,
"num_tokens": 22453145.0,
"reward": 1.1015625,
"reward_std": 0.21460118889808655,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/format_reward_step": 0.96875,
"step": 102
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5132780349794239,
"calib/avg_num_step_conf": 4.6328125,
"calib/ece": 0.30587301587301574,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.23015873015873015,
"calib/gap": -0.001481481481481306,
"calib/mean_conf": 0.8773015873015872,
"calib/mu_c": 0.8766666666666666,
"calib/mu_w": 0.8781481481481479,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.30587301587301574,
"calib/std_conf": 0.04898027945263534,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.8151098096632504,
"calib/step_q_c_n": 683.0,
"calib/step_q_gap": 0.013976608868021767,
"calib/step_q_w": 0.8011332007952287,
"calib/step_q_w_n": 503.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2695.0,
"completions/max_terminated_length": 2695.0,
"completions/mean_length": 451.765625,
"completions/mean_terminated_length": 453.53729248046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.0077002933248877525,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.013,
"num_tokens": 22673349.0,
"reward": 1.0546875,
"reward_std": 0.19874930381774902,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.984375,
"step": 103
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4478870509175498,
"calib/avg_num_step_conf": 4.921875,
"calib/ece": 0.4536507936507937,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.23015873015873015,
"calib/gap": -0.0038123546135953035,
"calib/mean_conf": 0.8707936507936508,
"calib/mu_c": 0.8685849056603773,
"calib/mu_w": 0.8723972602739726,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4519047619047619,
"calib/std_conf": 0.07719901280933066,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8096183206106871,
"calib/step_q_c_n": 524.0,
"calib/step_q_gap": 0.020759624958513245,
"calib/step_q_w": 0.7888586956521738,
"calib/step_q_w_n": 736.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2125.0,
"completions/max_terminated_length": 2125.0,
"completions/mean_length": 457.78125,
"completions/mean_terminated_length": 459.5765075683594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.008258271962404251,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0147,
"num_tokens": 22897221.0,
"reward": 0.91015625,
"reward_std": 0.312614381313324,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/format_reward_step": 0.984375,
"step": 104
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5020491803278688,
"calib/avg_num_step_conf": 4.62890625,
"calib/ece": 0.3603200000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.268,
"calib/gap": 0.0016495901639342891,
"calib/mean_conf": 0.87232,
"calib/mu_c": 0.8731249999999999,
"calib/mu_w": 0.8714754098360656,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.3603200000000001,
"calib/std_conf": 0.06705980614347166,
"calib/step_conf_rate": 0.96875,
"calib/step_q_c": 0.7889141856392294,
"calib/step_q_c_n": 571.0,
"calib/step_q_gap": 0.011187801274408549,
"calib/step_q_w": 0.7777263843648209,
"calib/step_q_w_n": 614.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2944.0,
"completions/max_terminated_length": 2944.0,
"completions/mean_length": 473.08984375,
"completions/mean_terminated_length": 476.8149719238281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.112,
"grad_norm": 0.0080982381477952,
"learning_rate": 2.6388888888888893e-06,
"loss": -0.0084,
"num_tokens": 23124092.0,
"reward": 0.98046875,
"reward_std": 0.3512883186340332,
"rewards/accuracy_reward_step": 0.5,
"rewards/format_reward_step": 0.9609375,
"step": 105
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4590505998956703,
"calib/avg_num_step_conf": 4.87890625,
"calib/ece": 0.31031999999999993,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.304,
"calib/gap": -0.005239958268127176,
"calib/mean_conf": 0.87832,
"calib/mu_c": 0.8760563380281691,
"calib/mu_w": 0.8812962962962962,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.31031999999999993,
"calib/std_conf": 0.05275962092358132,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7902427921092564,
"calib/step_q_c_n": 659.0,
"calib/step_q_gap": 0.010836012448239396,
"calib/step_q_w": 0.779406779661017,
"calib/step_q_w_n": 590.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2299.0,
"completions/max_terminated_length": 2299.0,
"completions/mean_length": 443.265625,
"completions/mean_terminated_length": 443.265625,
"completions/min_length": 166.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.006772839929908514,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.1007,
"num_tokens": 23342152.0,
"reward": 1.041015625,
"reward_std": 0.2168501913547516,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/format_reward_step": 0.97265625,
"step": 106
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5359669811320754,
"calib/avg_num_step_conf": 5.42578125,
"calib/ece": 0.2570196078431372,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.2784313725490196,
"calib/gap": 0.007295597484276772,
"calib/mean_conf": 0.8778823529411766,
"calib/mu_c": 0.8806289308176102,
"calib/mu_w": 0.8733333333333334,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.25568627450980386,
"calib/std_conf": 0.05434300902377219,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7773294117647059,
"calib/step_q_c_n": 850.0,
"calib/step_q_gap": -0.009201200480192018,
"calib/step_q_w": 0.786530612244898,
"calib/step_q_w_n": 539.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1286.0,
"completions/max_terminated_length": 1286.0,
"completions/mean_length": 432.13671875,
"completions/mean_terminated_length": 433.8313903808594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.006940098479390144,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0097,
"num_tokens": 23557395.0,
"reward": 1.1171875,
"reward_std": 0.2205488383769989,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/format_reward_step": 0.9921875,
"step": 107
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5335861321776815,
"calib/avg_num_step_conf": 5.7109375,
"calib/ece": 0.1599604743083005,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.30039525691699603,
"calib/gap": 0.00709332920600525,
"calib/mean_conf": 0.8793280632411067,
"calib/mu_c": 0.8813186813186812,
"calib/mu_w": 0.8742253521126759,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1599604743083005,
"calib/std_conf": 0.04983711693419079,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7930469530469529,
"calib/step_q_c_n": 1001.0,
"calib/step_q_gap": 0.009077321810510464,
"calib/step_q_w": 0.7839696312364425,
"calib/step_q_w_n": 461.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2736.0,
"completions/max_terminated_length": 2736.0,
"completions/mean_length": 477.35546875,
"completions/mean_terminated_length": 477.35546875,
"completions/min_length": 135.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.1152,
"grad_norm": 0.006124243140220642,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0207,
"num_tokens": 23782830.0,
"reward": 1.205078125,
"reward_std": 0.20648230612277985,
"rewards/accuracy_reward_step": 0.7109375,
"rewards/format_reward_step": 0.98828125,
"step": 108
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.501492252984506,
"calib/avg_num_step_conf": 6.2265625,
"calib/ece": 0.3770916334661354,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3466135458167331,
"calib/gap": -0.0004718059436117361,
"calib/mean_conf": 0.8830677290836654,
"calib/mu_c": 0.8828346456692916,
"calib/mu_w": 0.8833064516129033,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3770916334661354,
"calib/std_conf": 0.0537271344343267,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7836856010568032,
"calib/step_q_c_n": 757.0,
"calib/step_q_gap": 0.026465768320841243,
"calib/step_q_w": 0.7572198327359619,
"calib/step_q_w_n": 837.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2959.0,
"completions/max_terminated_length": 2959.0,
"completions/mean_length": 494.6328125,
"completions/mean_terminated_length": 496.57257080078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.0049357945099473,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.056,
"num_tokens": 24014056.0,
"reward": 0.986328125,
"reward_std": 0.14231275022029877,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.98046875,
"step": 109
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4638297872340426,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.3464453125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.3203125,
"calib/gap": -0.013190872648782048,
"calib/mean_conf": 0.8800390625000001,
"calib/mu_c": 0.874113475177305,
"calib/mu_w": 0.887304347826087,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3378515625,
"calib/std_conf": 0.06806188617075709,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7839516129032257,
"calib/step_q_c_n": 620.0,
"calib/step_q_gap": 0.0028177839069432054,
"calib/step_q_w": 0.7811338289962825,
"calib/step_q_w_n": 538.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1209.0,
"completions/max_terminated_length": 1209.0,
"completions/mean_length": 416.0546875,
"completions/mean_terminated_length": 417.6863098144531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.007653168402612209,
"learning_rate": 2.5e-06,
"loss": 0.0085,
"num_tokens": 24225486.0,
"reward": 1.05078125,
"reward_std": 0.25460559129714966,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 1.0,
"step": 110
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5320876042528808,
"calib/avg_num_step_conf": 5.453125,
"calib/ece": 0.3301587301587301,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.32936507936507936,
"calib/gap": 0.007657732221302727,
"calib/mean_conf": 0.8817460317460318,
"calib/mu_c": 0.885179856115108,
"calib/mu_w": 0.8775221238938052,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3301587301587301,
"calib/std_conf": 0.0544536990607233,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7849710982658958,
"calib/step_q_c_n": 692.0,
"calib/step_q_gap": 0.016732461902259632,
"calib/step_q_w": 0.7682386363636362,
"calib/step_q_w_n": 704.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2085.0,
"completions/max_terminated_length": 2085.0,
"completions/mean_length": 484.6015625,
"completions/mean_terminated_length": 486.5019836425781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.1184,
"grad_norm": 0.006669084075838327,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0063,
"num_tokens": 24456952.0,
"reward": 1.029296875,
"reward_std": 0.2437652051448822,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/format_reward_step": 0.97265625,
"step": 111
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5491143317230274,
"calib/avg_num_step_conf": 5.05859375,
"calib/ece": 0.35539999999999994,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.344,
"calib/gap": 0.012634460547503945,
"calib/mean_conf": 0.8840399999999999,
"calib/mu_c": 0.8898518518518518,
"calib/mu_w": 0.8772173913043478,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.34972,
"calib/std_conf": 0.07832801797568989,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.7576998597475456,
"calib/step_q_c_n": 713.0,
"calib/step_q_gap": 0.012270306482940696,
"calib/step_q_w": 0.7454295532646049,
"calib/step_q_w_n": 582.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2601.0,
"completions/max_terminated_length": 2601.0,
"completions/mean_length": 502.12890625,
"completions/mean_terminated_length": 510.0992431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.0062360563315451145,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0064,
"num_tokens": 24693417.0,
"reward": 1.013671875,
"reward_std": 0.22822797298431396,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.97265625,
"step": 112
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5737267080745341,
"calib/avg_num_step_conf": 5.56640625,
"calib/ece": 0.33831372549019617,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.30980392156862746,
"calib/gap": 0.008329192546583708,
"calib/mean_conf": 0.881529411764706,
"calib/mu_c": 0.8852857142857142,
"calib/mu_w": 0.8769565217391305,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3354117647058824,
"calib/std_conf": 0.054639766076630326,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.779700272479564,
"calib/step_q_c_n": 734.0,
"calib/step_q_gap": 0.03318797146653929,
"calib/step_q_w": 0.7465123010130247,
"calib/step_q_w_n": 691.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1569.0,
"completions/max_terminated_length": 1569.0,
"completions/mean_length": 428.05078125,
"completions/mean_terminated_length": 429.72943115234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.007974677719175816,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0155,
"num_tokens": 24908198.0,
"reward": 1.044921875,
"reward_std": 0.26487648487091064,
"rewards/accuracy_reward_step": 0.546875,
"rewards/format_reward_step": 0.99609375,
"step": 113
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.44065556436690456,
"calib/avg_num_step_conf": 6.0546875,
"calib/ece": 0.2709881422924902,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.37549407114624506,
"calib/gap": -0.008441712926249068,
"calib/mean_conf": 0.8859288537549408,
"calib/mu_c": 0.8826923076923077,
"calib/mu_w": 0.8911340206185567,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2701581027667985,
"calib/std_conf": 0.053468867945726924,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7469487750556792,
"calib/step_q_c_n": 898.0,
"calib/step_q_gap": 0.00016963395138480664,
"calib/step_q_w": 0.7467791411042944,
"calib/step_q_w_n": 652.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3037.0,
"completions/max_terminated_length": 3037.0,
"completions/mean_length": 483.28125,
"completions/mean_terminated_length": 483.28125,
"completions/min_length": 155.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.1216,
"grad_norm": 0.007670039776712656,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0323,
"num_tokens": 25136942.0,
"reward": 1.103515625,
"reward_std": 0.3302323520183563,
"rewards/accuracy_reward_step": 0.609375,
"rewards/format_reward_step": 0.98828125,
"step": 114
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5204283887468031,
"calib/avg_num_step_conf": 5.76171875,
"calib/ece": 0.3450199203187251,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3944223107569721,
"calib/gap": 0.0028574168797955712,
"calib/mean_conf": 0.8868525896414343,
"calib/mu_c": 0.8881617647058824,
"calib/mu_w": 0.8853043478260868,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3450199203187251,
"calib/std_conf": 0.05376700308538497,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7592137592137592,
"calib/step_q_c_n": 814.0,
"calib/step_q_gap": -0.013388358789266475,
"calib/step_q_w": 0.7726021180030257,
"calib/step_q_w_n": 661.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3033.0,
"completions/max_terminated_length": 3033.0,
"completions/mean_length": 452.8203125,
"completions/mean_terminated_length": 454.5960998535156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.006985951215028763,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0417,
"num_tokens": 25358128.0,
"reward": 1.017578125,
"reward_std": 0.2832874059677124,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.97265625,
"step": 115
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.3871982537236774,
"calib/avg_num_step_conf": 6.40234375,
"calib/ece": 0.3520800000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.34,
"calib/gap": -0.019911402157165026,
"calib/mean_conf": 0.88008,
"calib/mu_c": 0.8706818181818182,
"calib/mu_w": 0.8905932203389832,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3520800000000001,
"calib/std_conf": 0.052786301253260774,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7537380627557981,
"calib/step_q_c_n": 733.0,
"calib/step_q_gap": 0.022038283506349954,
"calib/step_q_w": 0.7316997792494482,
"calib/step_q_w_n": 906.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3024.0,
"completions/max_terminated_length": 3024.0,
"completions/mean_length": 497.71484375,
"completions/mean_terminated_length": 499.66668701171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 127.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.005950558930635452,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0413,
"num_tokens": 25590063.0,
"reward": 1.0,
"reward_std": 0.24267561733722687,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.96875,
"step": 116
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5050012503125781,
"calib/avg_num_step_conf": 5.98046875,
"calib/ece": 0.39525691699604737,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3715415019762846,
"calib/gap": 0.005590147536883916,
"calib/mean_conf": 0.8853754940711462,
"calib/mu_c": 0.8882258064516128,
"calib/mu_w": 0.8826356589147288,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.39525691699604737,
"calib/std_conf": 0.057183199392304755,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.765679758308157,
"calib/step_q_c_n": 662.0,
"calib/step_q_gap": 0.05223902182944584,
"calib/step_q_w": 0.7134407364787112,
"calib/step_q_w_n": 869.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2601.0,
"completions/max_terminated_length": 2601.0,
"completions/mean_length": 485.38671875,
"completions/mean_terminated_length": 485.38671875,
"completions/min_length": 167.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.1248,
"grad_norm": 0.005563353653997183,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0457,
"num_tokens": 25820922.0,
"reward": 0.974609375,
"reward_std": 0.21814069151878357,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.98046875,
"step": 117
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4824248120300752,
"calib/avg_num_step_conf": 6.9453125,
"calib/ece": 0.32771653543307083,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.37401574803149606,
"calib/gap": 0.0061165413533834645,
"calib/mean_conf": 0.8788976377952756,
"calib/mu_c": 0.8816428571428572,
"calib/mu_w": 0.8755263157894737,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32771653543307083,
"calib/std_conf": 0.08366344601523654,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7382222222222223,
"calib/step_q_c_n": 855.0,
"calib/step_q_gap": 0.03433273143132298,
"calib/step_q_w": 0.7038894907908994,
"calib/step_q_w_n": 923.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2218.0,
"completions/max_terminated_length": 2218.0,
"completions/mean_length": 532.171875,
"completions/mean_terminated_length": 534.2588500976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.004880652762949467,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0038,
"num_tokens": 26061166.0,
"reward": 1.04296875,
"reward_std": 0.18309026956558228,
"rewards/accuracy_reward_step": 0.546875,
"rewards/format_reward_step": 0.9921875,
"step": 118
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5308506518652382,
"calib/avg_num_step_conf": 6.3359375,
"calib/ece": 0.37538152610441766,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.42168674698795183,
"calib/gap": 0.007094359106751047,
"calib/mean_conf": 0.885421686746988,
"calib/mu_c": 0.8888976377952758,
"calib/mu_w": 0.8818032786885247,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37538152610441766,
"calib/std_conf": 0.057294445678343194,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7466408268733848,
"calib/step_q_c_n": 774.0,
"calib/step_q_gap": 0.035933279703573406,
"calib/step_q_w": 0.7107075471698114,
"calib/step_q_w_n": 848.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3053.0,
"completions/max_terminated_length": 3053.0,
"completions/mean_length": 557.765625,
"completions/mean_terminated_length": 562.157470703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.005895211827009916,
"learning_rate": 2.25e-06,
"loss": 0.0334,
"num_tokens": 26309018.0,
"reward": 0.982421875,
"reward_std": 0.28925490379333496,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.97265625,
"step": 119
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5175073373831138,
"calib/avg_num_step_conf": 5.99609375,
"calib/ece": 0.2411507936507938,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.3253968253968254,
"calib/gap": 0.0009698996655516101,
"calib/mean_conf": 0.8728174603174603,
"calib/mu_c": 0.8731677018633539,
"calib/mu_w": 0.8721978021978023,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2375396825396827,
"calib/std_conf": 0.08035387020574726,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7378947368421053,
"calib/step_q_c_n": 912.0,
"calib/step_q_gap": -0.014609275998986204,
"calib/step_q_w": 0.7525040128410915,
"calib/step_q_w_n": 623.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2748.0,
"completions/max_terminated_length": 2748.0,
"completions/mean_length": 460.84765625,
"completions/mean_terminated_length": 460.84765625,
"completions/min_length": 142.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.128,
"grad_norm": 0.006268102675676346,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0838,
"num_tokens": 26533683.0,
"reward": 1.12109375,
"reward_std": 0.24319830536842346,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/format_reward_step": 0.984375,
"step": 120
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5452300575143786,
"calib/avg_num_step_conf": 6.93359375,
"calib/ece": 0.3722924901185771,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.35177865612648224,
"calib/gap": 0.01538322080520138,
"calib/mean_conf": 0.8769565217391304,
"calib/mu_c": 0.8844961240310077,
"calib/mu_w": 0.8691129032258064,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3696837944664032,
"calib/std_conf": 0.07015951677025219,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7287597911227154,
"calib/step_q_c_n": 766.0,
"calib/step_q_gap": 0.00698575742598595,
"calib/step_q_w": 0.7217740336967294,
"calib/step_q_w_n": 1009.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2467.0,
"completions/max_terminated_length": 2467.0,
"completions/mean_length": 513.14453125,
"completions/mean_terminated_length": 515.1569213867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.005872685927897692,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.042,
"num_tokens": 26770104.0,
"reward": 0.998046875,
"reward_std": 0.3006698489189148,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/format_reward_step": 0.98828125,
"step": 121
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5214,
"calib/avg_num_step_conf": 6.5546875,
"calib/ece": 0.2801600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.316,
"calib/gap": 0.002466666666666728,
"calib/mean_conf": 0.87168,
"calib/mu_c": 0.8726666666666667,
"calib/mu_w": 0.8702,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.27592000000000005,
"calib/std_conf": 0.0606133450652577,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7503354978354978,
"calib/step_q_c_n": 924.0,
"calib/step_q_gap": 0.04221878696016634,
"calib/step_q_w": 0.7081167108753315,
"calib/step_q_w_n": 754.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2853.0,
"completions/max_terminated_length": 2853.0,
"completions/mean_length": 462.7890625,
"completions/mean_terminated_length": 470.13494873046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.0056983535178005695,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0162,
"num_tokens": 26995922.0,
"reward": 1.07421875,
"reward_std": 0.2015603482723236,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.9765625,
"step": 122
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4684514831573655,
"calib/avg_num_step_conf": 6.671875,
"calib/ece": 0.33980237154150195,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.31620553359683795,
"calib/gap": -0.008157365510306547,
"calib/mean_conf": 0.8734782608695653,
"calib/mu_c": 0.8697058823529412,
"calib/mu_w": 0.8778632478632478,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.33786561264822135,
"calib/std_conf": 0.06231816649450058,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7276886792452829,
"calib/step_q_c_n": 848.0,
"calib/step_q_gap": 0.031502632733654945,
"calib/step_q_w": 0.696186046511628,
"calib/step_q_w_n": 860.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2663.0,
"completions/max_terminated_length": 2663.0,
"completions/mean_length": 544.2890625,
"completions/mean_terminated_length": 546.423583984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.1312,
"grad_norm": 0.006151233799755573,
"learning_rate": 2.138888888888889e-06,
"loss": -0.0215,
"num_tokens": 27240548.0,
"reward": 1.0234375,
"reward_std": 0.29736945033073425,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.984375,
"step": 123
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5681663258350375,
"calib/avg_num_step_conf": 6.296875,
"calib/ece": 0.23648221343873532,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.2964426877470356,
"calib/gap": 0.011232447171097615,
"calib/mean_conf": 0.8743478260869565,
"calib/mu_c": 0.8783435582822087,
"calib/mu_w": 0.867111111111111,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23328063241106733,
"calib/std_conf": 0.05411221656373088,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7459311740890688,
"calib/step_q_c_n": 988.0,
"calib/step_q_gap": -0.0011521592442643724,
"calib/step_q_w": 0.7470833333333332,
"calib/step_q_w_n": 624.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2217.0,
"completions/max_terminated_length": 2217.0,
"completions/mean_length": 493.08984375,
"completions/mean_terminated_length": 493.08984375,
"completions/min_length": 163.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.006515982560813427,
"learning_rate": 2.1111111111111114e-06,
"loss": 0.0049,
"num_tokens": 27473595.0,
"reward": 1.12890625,
"reward_std": 0.26251235604286194,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 0.984375,
"step": 124
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4942148760330579,
"calib/avg_num_step_conf": 6.15625,
"calib/ece": 0.3582470119521912,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.26693227091633465,
"calib/gap": -0.00013477431659247863,
"calib/mean_conf": 0.8729880478087649,
"calib/mu_c": 0.8729230769230768,
"calib/mu_w": 0.8730578512396693,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3566533864541832,
"calib/std_conf": 0.05457766001472556,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7396488764044944,
"calib/step_q_c_n": 712.0,
"calib/step_q_gap": 0.020678968997086877,
"calib/step_q_w": 0.7189699074074075,
"calib/step_q_w_n": 864.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2858.0,
"completions/max_terminated_length": 2858.0,
"completions/mean_length": 502.80078125,
"completions/mean_terminated_length": 504.7725830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.00571054220199585,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0216,
"num_tokens": 27707120.0,
"reward": 0.99609375,
"reward_std": 0.3074162006378174,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/format_reward_step": 0.9765625,
"step": 125
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4624029551715502,
"calib/avg_num_step_conf": 6.62109375,
"calib/ece": 0.35237154150197625,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.33992094861660077,
"calib/gap": -0.004318181818181777,
"calib/mean_conf": 0.8741106719367588,
"calib/mu_c": 0.8720454545454546,
"calib/mu_w": 0.8763636363636363,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.35237154150197625,
"calib/std_conf": 0.06917711990051564,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7486817102137767,
"calib/step_q_c_n": 842.0,
"calib/step_q_gap": 0.0361529880566841,
"calib/step_q_w": 0.7125287221570926,
"calib/step_q_w_n": 853.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1862.0,
"completions/max_terminated_length": 1862.0,
"completions/mean_length": 481.08203125,
"completions/mean_terminated_length": 484.8700866699219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.1344,
"grad_norm": 0.0058242930099368095,
"learning_rate": 2.0555555555555555e-06,
"loss": -0.0112,
"num_tokens": 27935741.0,
"reward": 1.0078125,
"reward_std": 0.25029534101486206,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.984375,
"step": 126
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.48636220472440944,
"calib/avg_num_step_conf": 6.2421875,
"calib/ece": 0.3589682539682541,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.25396825396825395,
"calib/gap": 0.005033070866141798,
"calib/mean_conf": 0.862936507936508,
"calib/mu_c": 0.8654330708661417,
"calib/mu_w": 0.8603999999999999,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3589682539682541,
"calib/std_conf": 0.08610225980403213,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7256891891891892,
"calib/step_q_c_n": 740.0,
"calib/step_q_gap": 0.007122755622755661,
"calib/step_q_w": 0.7185664335664336,
"calib/step_q_w_n": 858.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2770.0,
"completions/max_terminated_length": 2770.0,
"completions/mean_length": 481.63671875,
"completions/mean_terminated_length": 481.63671875,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.006309483200311661,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0508,
"num_tokens": 28162712.0,
"reward": 0.986328125,
"reward_std": 0.2884799540042877,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.98046875,
"step": 127
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5073977936404932,
"calib/avg_num_step_conf": 5.19140625,
"calib/ece": 0.32823293172690754,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.28112449799196787,
"calib/gap": 0.008704088254380404,
"calib/mean_conf": 0.8627710843373495,
"calib/mu_c": 0.8667910447761195,
"calib/mu_w": 0.858086956521739,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3264257028112449,
"calib/std_conf": 0.08950080100620286,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7451987281399046,
"calib/step_q_c_n": 629.0,
"calib/step_q_gap": 0.04137015671133315,
"calib/step_q_w": 0.7038285714285715,
"calib/step_q_w_n": 700.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2523.0,
"completions/max_terminated_length": 2523.0,
"completions/mean_length": 489.09765625,
"completions/mean_terminated_length": 489.09765625,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.0059307715855538845,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0333,
"num_tokens": 28394585.0,
"reward": 1.009765625,
"reward_std": 0.26397642493247986,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/format_reward_step": 0.97265625,
"step": 128
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4389801375095493,
"calib/avg_num_step_conf": 5.80859375,
"calib/ece": 0.27578125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.21875,
"calib/gap": -0.013776419658772743,
"calib/mean_conf": 0.86328125,
"calib/mu_c": 0.8577922077922076,
"calib/mu_w": 0.8715686274509803,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26875000000000004,
"calib/std_conf": 0.05657005301780705,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7613248847926267,
"calib/step_q_c_n": 868.0,
"calib/step_q_gap": 0.009208568152885288,
"calib/step_q_w": 0.7521163166397414,
"calib/step_q_w_n": 619.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1591.0,
"completions/max_terminated_length": 1591.0,
"completions/mean_length": 422.0625,
"completions/mean_terminated_length": 423.7176818847656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.1376,
"grad_norm": 0.00600614957511425,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.0002,
"num_tokens": 28605017.0,
"reward": 1.1015625,
"reward_std": 0.22764956951141357,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/format_reward_step": 1.0,
"step": 129
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.544435099523409,
"calib/avg_num_step_conf": 4.87890625,
"calib/ece": 0.221394422310757,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.2549800796812749,
"calib/gap": 0.006085646201289596,
"calib/mean_conf": 0.8747808764940239,
"calib/mu_c": 0.8768902439024391,
"calib/mu_w": 0.8708045977011495,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.221394422310757,
"calib/std_conf": 0.04967074652737102,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7580371352785145,
"calib/step_q_c_n": 754.0,
"calib/step_q_gap": 0.012420973662352841,
"calib/step_q_w": 0.7456161616161616,
"calib/step_q_w_n": 495.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2772.0,
"completions/max_terminated_length": 2772.0,
"completions/mean_length": 445.734375,
"completions/mean_terminated_length": 445.734375,
"completions/min_length": 157.0,
"completions/min_terminated_length": 157.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.00482236547395587,
"learning_rate": 1.944444444444445e-06,
"loss": 0.0876,
"num_tokens": 28824413.0,
"reward": 1.130859375,
"reward_std": 0.17365878820419312,
"rewards/accuracy_reward_step": 0.640625,
"rewards/format_reward_step": 0.98046875,
"step": 130
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4551726350917726,
"calib/avg_num_step_conf": 5.05078125,
"calib/ece": 0.4493280632411065,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.25691699604743085,
"calib/gap": -0.008606725709151664,
"calib/mean_conf": 0.8658498023715415,
"calib/mu_c": 0.8608490566037735,
"calib/mu_w": 0.8694557823129252,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4481027667984188,
"calib/std_conf": 0.05317367667620221,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7563003663003663,
"calib/step_q_c_n": 546.0,
"calib/step_q_gap": 0.012913485443606043,
"calib/step_q_w": 0.7433868808567603,
"calib/step_q_w_n": 747.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2368.0,
"completions/max_terminated_length": 2368.0,
"completions/mean_length": 425.23828125,
"completions/mean_terminated_length": 425.23828125,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.00542412931099534,
"learning_rate": 1.916666666666667e-06,
"loss": 0.0357,
"num_tokens": 29039482.0,
"reward": 0.908203125,
"reward_std": 0.19030889868736267,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/format_reward_step": 0.98828125,
"step": 131
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5115197022205109,
"calib/avg_num_step_conf": 5.26953125,
"calib/ece": 0.29276679841897235,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.30039525691699603,
"calib/gap": 0.002339237581825304,
"calib/mean_conf": 0.8712648221343874,
"calib/mu_c": 0.8722448979591837,
"calib/mu_w": 0.8699056603773584,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.291501976284585,
"calib/std_conf": 0.053847804000900085,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7725869894099849,
"calib/step_q_c_n": 661.0,
"calib/step_q_gap": 0.04151140801463604,
"calib/step_q_w": 0.7310755813953489,
"calib/step_q_w_n": 688.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2904.0,
"completions/max_terminated_length": 2904.0,
"completions/mean_length": 468.3046875,
"completions/mean_terminated_length": 468.3046875,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.1408,
"grad_norm": 0.005643435753881931,
"learning_rate": 1.888888888888889e-06,
"loss": 0.055,
"num_tokens": 29264960.0,
"reward": 1.068359375,
"reward_std": 0.2384309619665146,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.98828125,
"step": 132
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5193548387096774,
"calib/avg_num_step_conf": 6.1796875,
"calib/ece": 0.48519685039370086,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.28346456692913385,
"calib/gap": 0.004450961225154648,
"calib/mean_conf": 0.8749606299212599,
"calib/mu_c": 0.8776767676767675,
"calib/mu_w": 0.8732258064516129,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.48519685039370086,
"calib/std_conf": 0.056476237296147466,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7284842883548983,
"calib/step_q_c_n": 541.0,
"calib/step_q_gap": -0.017836556986119967,
"calib/step_q_w": 0.7463208453410183,
"calib/step_q_w_n": 1041.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2932.0,
"completions/max_terminated_length": 2932.0,
"completions/mean_length": 541.546875,
"completions/mean_terminated_length": 541.546875,
"completions/min_length": 188.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.0053438725881278515,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.0547,
"num_tokens": 29509940.0,
"reward": 0.8828125,
"reward_std": 0.28567051887512207,
"rewards/accuracy_reward_step": 0.38671875,
"rewards/format_reward_step": 0.9921875,
"step": 133
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.49387676999617297,
"calib/avg_num_step_conf": 5.70703125,
"calib/ece": 0.3452988047808765,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.30677290836653387,
"calib/gap": -0.0009267763745375301,
"calib/mean_conf": 0.8791633466135459,
"calib/mu_c": 0.878731343283582,
"calib/mu_w": 0.8796581196581196,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3452988047808765,
"calib/std_conf": 0.05131436984845087,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7596148359486448,
"calib/step_q_c_n": 701.0,
"calib/step_q_gap": 0.05404904647496056,
"calib/step_q_w": 0.7055657894736842,
"calib/step_q_w_n": 760.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2605.0,
"completions/max_terminated_length": 2605.0,
"completions/mean_length": 543.71484375,
"completions/mean_terminated_length": 545.8471069335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.005967985838651657,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0729,
"num_tokens": 29758083.0,
"reward": 1.01171875,
"reward_std": 0.32466161251068115,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/format_reward_step": 0.9765625,
"step": 134
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4150937339496661,
"calib/avg_num_step_conf": 5.01171875,
"calib/ece": 0.34711999999999993,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.312,
"calib/gap": -0.016304571135079438,
"calib/mean_conf": 0.87512,
"calib/mu_c": 0.8674242424242424,
"calib/mu_w": 0.8837288135593219,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.34711999999999993,
"calib/std_conf": 0.052262659710351524,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7503671328671329,
"calib/step_q_c_n": 572.0,
"calib/step_q_gap": 0.00573984735377131,
"calib/step_q_w": 0.7446272855133615,
"calib/step_q_w_n": 711.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2582.0,
"completions/max_terminated_length": 2582.0,
"completions/mean_length": 504.8828125,
"completions/mean_terminated_length": 506.8627624511719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.144,
"grad_norm": 0.0058227465488016605,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0757,
"num_tokens": 29993213.0,
"reward": 1.00390625,
"reward_std": 0.2614578604698181,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.9765625,
"step": 135
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.507595648912228,
"calib/avg_num_step_conf": 5.7109375,
"calib/ece": 0.38924901185770755,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.3201581027667984,
"calib/gap": 0.007725056264066166,
"calib/mean_conf": 0.8793675889328064,
"calib/mu_c": 0.8833064516129033,
"calib/mu_w": 0.8755813953488372,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.38924901185770755,
"calib/std_conf": 0.055442838641589665,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7597941176470587,
"calib/step_q_c_n": 680.0,
"calib/step_q_gap": 0.05562531969309448,
"calib/step_q_w": 0.7041687979539643,
"calib/step_q_w_n": 782.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2522.0,
"completions/max_terminated_length": 2522.0,
"completions/mean_length": 474.08984375,
"completions/mean_terminated_length": 474.08984375,
"completions/min_length": 168.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.005732369609177113,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0432,
"num_tokens": 30223068.0,
"reward": 0.9765625,
"reward_std": 0.25726181268692017,
"rewards/accuracy_reward_step": 0.484375,
"rewards/format_reward_step": 0.984375,
"step": 136
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5126005530417295,
"calib/avg_num_step_conf": 6.0078125,
"calib/ece": 0.3438339920948619,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.2885375494071146,
"calib/gap": -0.006987179487179751,
"calib/mean_conf": 0.8657312252964426,
"calib/mu_c": 0.8624999999999999,
"calib/mu_w": 0.8694871794871797,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3360079051383401,
"calib/std_conf": 0.07381714731970884,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7409504132231405,
"calib/step_q_c_n": 726.0,
"calib/step_q_gap": 0.031492285144322785,
"calib/step_q_w": 0.7094581280788177,
"calib/step_q_w_n": 812.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2778.0,
"completions/max_terminated_length": 2778.0,
"completions/mean_length": 459.9375,
"completions/mean_terminated_length": 463.5590515136719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.004957827739417553,
"learning_rate": 1.75e-06,
"loss": 0.0157,
"num_tokens": 30447796.0,
"reward": 1.0234375,
"reward_std": 0.21421602368354797,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.984375,
"step": 137
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5012825445684237,
"calib/avg_num_step_conf": 5.49609375,
"calib/ece": 0.3270517928286852,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3107569721115538,
"calib/gap": 0.003289085545722714,
"calib/mean_conf": 0.8768525896414343,
"calib/mu_c": 0.8783333333333334,
"calib/mu_w": 0.8750442477876107,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3270517928286852,
"calib/std_conf": 0.054947042444279365,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7471188251001336,
"calib/step_q_c_n": 749.0,
"calib/step_q_gap": -0.002957162741811681,
"calib/step_q_w": 0.7500759878419453,
"calib/step_q_w_n": 658.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3010.0,
"completions/max_terminated_length": 3010.0,
"completions/mean_length": 473.421875,
"completions/mean_terminated_length": 475.2784729003906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.1472,
"grad_norm": 0.006393305957317352,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0518,
"num_tokens": 30673328.0,
"reward": 1.029296875,
"reward_std": 0.34001946449279785,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/format_reward_step": 0.98046875,
"step": 138
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5215581426830919,
"calib/avg_num_step_conf": 5.0859375,
"calib/ece": 0.22631372549019618,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.27450980392156865,
"calib/gap": 0.0017117909841616319,
"calib/mean_conf": 0.8772941176470588,
"calib/mu_c": 0.8778915662650604,
"calib/mu_w": 0.8761797752808987,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22631372549019618,
"calib/std_conf": 0.05181539407779448,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7570445859872611,
"calib/step_q_c_n": 785.0,
"calib/step_q_gap": 0.003988493143934235,
"calib/step_q_w": 0.7530560928433269,
"calib/step_q_w_n": 517.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2453.0,
"completions/max_terminated_length": 2453.0,
"completions/mean_length": 426.875,
"completions/mean_terminated_length": 426.875,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.0056905848905444145,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0293,
"num_tokens": 30885704.0,
"reward": 1.146484375,
"reward_std": 0.21988742053508759,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/format_reward_step": 0.99609375,
"step": 139
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5948792071030353,
"calib/avg_num_step_conf": 4.7578125,
"calib/ece": 0.22039370078740164,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2874015748031496,
"calib/gap": 0.01896895863445558,
"calib/mean_conf": 0.8778740157480315,
"calib/mu_c": 0.8843712574850299,
"calib/mu_w": 0.8654022988505743,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.22039370078740164,
"calib/std_conf": 0.05324331197155695,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7770304568527918,
"calib/step_q_c_n": 788.0,
"calib/step_q_gap": 0.021379294062094267,
"calib/step_q_w": 0.7556511627906976,
"calib/step_q_w_n": 430.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3034.0,
"completions/max_terminated_length": 3034.0,
"completions/mean_length": 429.625,
"completions/mean_terminated_length": 429.625,
"completions/min_length": 138.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.00584077462553978,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0172,
"num_tokens": 31100704.0,
"reward": 1.146484375,
"reward_std": 0.2521313428878784,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/format_reward_step": 0.98828125,
"step": 140
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.590432098765432,
"calib/avg_num_step_conf": 4.83984375,
"calib/ece": 0.24349206349206354,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.32936507936507936,
"calib/gap": 0.015925925925925788,
"calib/mean_conf": 0.8863492063492064,
"calib/mu_c": 0.892037037037037,
"calib/mu_w": 0.8761111111111112,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.24349206349206354,
"calib/std_conf": 0.046103802387744704,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7677184466019417,
"calib/step_q_c_n": 824.0,
"calib/step_q_gap": 0.0450437478067609,
"calib/step_q_w": 0.7226746987951808,
"calib/step_q_w_n": 415.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2650.0,
"completions/max_terminated_length": 2650.0,
"completions/mean_length": 484.71875,
"completions/mean_terminated_length": 484.71875,
"completions/min_length": 163.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.1504,
"grad_norm": 0.005316486582159996,
"learning_rate": 1.638888888888889e-06,
"loss": 0.0347,
"num_tokens": 31331888.0,
"reward": 1.12109375,
"reward_std": 0.24240916967391968,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/format_reward_step": 0.9765625,
"step": 141
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5043939002326182,
"calib/avg_num_step_conf": 5.578125,
"calib/ece": 0.3069444444444446,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.34523809523809523,
"calib/gap": 0.0011592142672524064,
"calib/mean_conf": 0.8836904761904761,
"calib/mu_c": 0.8841780821917806,
"calib/mu_w": 0.8830188679245282,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3056349206349207,
"calib/std_conf": 0.04964428455182058,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7646995994659546,
"calib/step_q_c_n": 749.0,
"calib/step_q_gap": 0.020855711395262322,
"calib/step_q_w": 0.7438438880706922,
"calib/step_q_w_n": 679.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2772.0,
"completions/max_terminated_length": 2772.0,
"completions/mean_length": 493.76953125,
"completions/mean_terminated_length": 495.7059020996094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.004341771826148033,
"learning_rate": 1.6111111111111113e-06,
"loss": 0.054,
"num_tokens": 31563453.0,
"reward": 1.060546875,
"reward_std": 0.21372465789318085,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.98046875,
"step": 142
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4680171964564877,
"calib/avg_num_step_conf": 5.47265625,
"calib/ece": 0.2893280632411067,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.31620553359683795,
"calib/gap": -0.013604090672225122,
"calib/mean_conf": 0.8774703557312253,
"calib/mu_c": 0.8720394736842106,
"calib/mu_w": 0.8856435643564358,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.28300395256916994,
"calib/std_conf": 0.07470577406418852,
"calib/step_conf_rate": 0.9765625,
"calib/step_q_c": 0.7722969543147208,
"calib/step_q_c_n": 788.0,
"calib/step_q_gap": 0.03821865088894594,
"calib/step_q_w": 0.7340783034257748,
"calib/step_q_w_n": 613.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2137.0,
"completions/max_terminated_length": 2137.0,
"completions/mean_length": 479.63671875,
"completions/mean_terminated_length": 481.5176696777344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.005102099850773811,
"learning_rate": 1.5833333333333333e-06,
"loss": -0.0289,
"num_tokens": 31793576.0,
"reward": 1.080078125,
"reward_std": 0.18172350525856018,
"rewards/accuracy_reward_step": 0.59375,
"rewards/format_reward_step": 0.97265625,
"step": 143
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.41367079889807157,
"calib/avg_num_step_conf": 4.70703125,
"calib/ece": 0.23885375494071143,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.34782608695652173,
"calib/gap": -0.01407575757575752,
"calib/mean_conf": 0.8878656126482213,
"calib/mu_c": 0.882969696969697,
"calib/mu_w": 0.8970454545454545,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23727272727272725,
"calib/std_conf": 0.04735900358614075,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7752658064516129,
"calib/step_q_c_n": 775.0,
"calib/step_q_gap": -0.012548147036759105,
"calib/step_q_w": 0.787813953488372,
"calib/step_q_w_n": 430.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2418.0,
"completions/max_terminated_length": 2418.0,
"completions/mean_length": 463.890625,
"completions/mean_terminated_length": 465.7098388671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 130.0,
"epoch": 0.1536,
"grad_norm": 0.00613579573109746,
"learning_rate": 1.5555555555555558e-06,
"loss": 0.0353,
"num_tokens": 32016460.0,
"reward": 1.138671875,
"reward_std": 0.2852437198162079,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/format_reward_step": 0.98828125,
"step": 144
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5328118209474142,
"calib/avg_num_step_conf": 5.17578125,
"calib/ece": 0.1929803921568627,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.403921568627451,
"calib/gap": 0.006162538026944886,
"calib/mean_conf": 0.8870980392156863,
"calib/mu_c": 0.8889830508474575,
"calib/mu_w": 0.8828205128205127,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1929803921568627,
"calib/std_conf": 0.048630360444270194,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7572655367231639,
"calib/step_q_c_n": 885.0,
"calib/step_q_gap": -0.01821173600410897,
"calib/step_q_w": 0.7754772727272728,
"calib/step_q_w_n": 440.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2712.0,
"completions/max_terminated_length": 2712.0,
"completions/mean_length": 447.33984375,
"completions/mean_terminated_length": 447.33984375,
"completions/min_length": 152.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.0053616659715771675,
"learning_rate": 1.527777777777778e-06,
"loss": 0.0454,
"num_tokens": 32233683.0,
"reward": 1.189453125,
"reward_std": 0.24315764009952545,
"rewards/accuracy_reward_step": 0.69140625,
"rewards/format_reward_step": 0.99609375,
"step": 145
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4438788577282623,
"calib/avg_num_step_conf": 4.47265625,
"calib/ece": 0.4364940239043825,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.3426294820717131,
"calib/gap": -0.010026251760788751,
"calib/mean_conf": 0.8874900398406375,
"calib/mu_c": 0.8820175438596493,
"calib/mu_w": 0.892043795620438,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.4349003984063745,
"calib/std_conf": 0.04589606850245974,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7788316831683167,
"calib/step_q_c_n": 505.0,
"calib/step_q_gap": 0.033253558168316766,
"calib/step_q_w": 0.745578125,
"calib/step_q_w_n": 640.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2706.0,
"completions/max_terminated_length": 2706.0,
"completions/mean_length": 451.91796875,
"completions/mean_terminated_length": 455.47637939453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.0055298893712460995,
"learning_rate": 1.5e-06,
"loss": 0.0213,
"num_tokens": 32456590.0,
"reward": 0.93359375,
"reward_std": 0.22286482155323029,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/format_reward_step": 0.9765625,
"step": 146
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.47230100207978826,
"calib/avg_num_step_conf": 5.1328125,
"calib/ece": 0.41333333333333333,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.4365079365079365,
"calib/gap": -0.014029117035356364,
"calib/mean_conf": 0.8855555555555554,
"calib/mu_c": 0.8783739837398374,
"calib/mu_w": 0.8924031007751938,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.4053968253968254,
"calib/std_conf": 0.09434869095545957,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7628910614525138,
"calib/step_q_c_n": 716.0,
"calib/step_q_gap": 0.0005164795127144961,
"calib/step_q_w": 0.7623745819397993,
"calib/step_q_w_n": 598.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2790.0,
"completions/max_terminated_length": 2790.0,
"completions/mean_length": 482.1640625,
"completions/mean_terminated_length": 485.96063232421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.0,
"epoch": 0.1568,
"grad_norm": 0.0051113637164235115,
"learning_rate": 1.4722222222222225e-06,
"loss": -0.0019,
"num_tokens": 32683704.0,
"reward": 0.970703125,
"reward_std": 0.2150426059961319,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/format_reward_step": 0.98046875,
"step": 147
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.41023061445596654,
"calib/avg_num_step_conf": 4.71484375,
"calib/ece": 0.1826482213438736,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.3675889328063241,
"calib/gap": -0.01767373471598821,
"calib/mean_conf": 0.883201581027668,
"calib/mu_c": 0.8782417582417582,
"calib/mu_w": 0.8959154929577464,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.17324110671936765,
"calib/std_conf": 0.055343223691501256,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7595911214953271,
"calib/step_q_c_n": 856.0,
"calib/step_q_gap": 0.004690836595042325,
"calib/step_q_w": 0.7549002849002848,
"calib/step_q_w_n": 351.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2324.0,
"completions/max_terminated_length": 2324.0,
"completions/mean_length": 454.93359375,
"completions/mean_terminated_length": 454.93359375,
"completions/min_length": 118.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.005389039404690266,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0286,
"num_tokens": 32905279.0,
"reward": 1.203125,
"reward_std": 0.22485414147377014,
"rewards/accuracy_reward_step": 0.7109375,
"rewards/format_reward_step": 0.984375,
"step": 148
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.45914851728805217,
"calib/avg_num_step_conf": 5.22265625,
"calib/ece": 0.39027450980392164,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.3607843137254902,
"calib/gap": -0.003661867847914335,
"calib/mean_conf": 0.884392156862745,
"calib/mu_c": 0.8825396825396825,
"calib/mu_w": 0.8862015503875968,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.39027450980392164,
"calib/std_conf": 0.05129234871327704,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7555536912751678,
"calib/step_q_c_n": 596.0,
"calib/step_q_gap": 0.014028725013359389,
"calib/step_q_w": 0.7415249662618084,
"calib/step_q_w_n": 741.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2449.0,
"completions/max_terminated_length": 2449.0,
"completions/mean_length": 514.65234375,
"completions/mean_terminated_length": 514.65234375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.00547590758651495,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0431,
"num_tokens": 33141486.0,
"reward": 0.990234375,
"reward_std": 0.24513350427150726,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/format_reward_step": 0.99609375,
"step": 149
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5161447505197505,
"calib/avg_num_step_conf": 5.109375,
"calib/ece": 0.3001587301587302,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.39285714285714285,
"calib/gap": 0.00910083160083175,
"calib/mean_conf": 0.8874603174603175,
"calib/mu_c": 0.8912162162162163,
"calib/mu_w": 0.8821153846153845,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3001587301587302,
"calib/std_conf": 0.05608859924243593,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.778169398907104,
"calib/step_q_c_n": 732.0,
"calib/step_q_gap": 0.019957593351548453,
"calib/step_q_w": 0.7582118055555556,
"calib/step_q_w_n": 576.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2796.0,
"completions/max_terminated_length": 2796.0,
"completions/mean_length": 446.85546875,
"completions/mean_terminated_length": 448.6078796386719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.16,
"grad_norm": 0.0062665254808962345,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0104,
"num_tokens": 33360841.0,
"reward": 1.06640625,
"reward_std": 0.3003883957862854,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.9765625,
"step": 150
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4230597680642284,
"calib/avg_num_step_conf": 4.65625,
"calib/ece": 0.42529880478087667,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.46613545816733065,
"calib/gap": -0.011984834968778002,
"calib/mean_conf": 0.8954183266932271,
"calib/mu_c": 0.8890677966101694,
"calib/mu_w": 0.9010526315789474,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.42529880478087667,
"calib/std_conf": 0.05261417392298819,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7676481481481481,
"calib/step_q_c_n": 540.0,
"calib/step_q_gap": 0.006076062258577597,
"calib/step_q_w": 0.7615720858895705,
"calib/step_q_w_n": 652.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2803.0,
"completions/max_terminated_length": 2803.0,
"completions/mean_length": 503.8828125,
"completions/mean_terminated_length": 503.8828125,
"completions/min_length": 176.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.005633654538542032,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0533,
"num_tokens": 33596859.0,
"reward": 0.94921875,
"reward_std": 0.2380410134792328,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/format_reward_step": 0.9765625,
"step": 151
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4950817536316853,
"calib/avg_num_step_conf": 5.109375,
"calib/ece": 0.37423387096774186,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.40725806451612906,
"calib/gap": -0.0002742492345777503,
"calib/mean_conf": 0.8943951612903226,
"calib/mu_c": 0.8942635658914727,
"calib/mu_w": 0.8945378151260505,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.37423387096774186,
"calib/std_conf": 0.04693463514918262,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7593402777777778,
"calib/step_q_c_n": 576.0,
"calib/step_q_gap": 0.010323884335154854,
"calib/step_q_w": 0.7490163934426229,
"calib/step_q_w_n": 732.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2812.0,
"completions/max_terminated_length": 2812.0,
"completions/mean_length": 480.80859375,
"completions/mean_terminated_length": 482.69415283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.0060722678899765015,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.03,
"num_tokens": 33825338.0,
"reward": 0.984375,
"reward_std": 0.27242544293403625,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/format_reward_step": 0.9609375,
"step": 152
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4812564968814969,
"calib/avg_num_step_conf": 4.9921875,
"calib/ece": 0.30067460317460315,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.38492063492063494,
"calib/gap": -0.003783783783783745,
"calib/mean_conf": 0.8852777777777778,
"calib/mu_c": 0.8837162162162163,
"calib/mu_w": 0.8875000000000001,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.29932539682539683,
"calib/std_conf": 0.04716908775359317,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7728005865102638,
"calib/step_q_c_n": 682.0,
"calib/step_q_gap": 0.029981794563955222,
"calib/step_q_w": 0.7428187919463086,
"calib/step_q_w_n": 596.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2887.0,
"completions/max_terminated_length": 2887.0,
"completions/mean_length": 478.51171875,
"completions/mean_terminated_length": 480.3882751464844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.1632,
"grad_norm": 0.005258821416646242,
"learning_rate": 1.3055555555555556e-06,
"loss": 0.0326,
"num_tokens": 34055157.0,
"reward": 1.068359375,
"reward_std": 0.23543894290924072,
"rewards/accuracy_reward_step": 0.578125,
"rewards/format_reward_step": 0.98046875,
"step": 153
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4706623810424151,
"calib/avg_num_step_conf": 4.31640625,
"calib/ece": 0.3987698412698413,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.3531746031746032,
"calib/gap": -0.004539610512384051,
"calib/mean_conf": 0.8830555555555556,
"calib/mu_c": 0.8807317073170733,
"calib/mu_w": 0.8852713178294573,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.39686507936507937,
"calib/std_conf": 0.05169919184464851,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7663198458574182,
"calib/step_q_c_n": 519.0,
"calib/step_q_gap": 0.030193565993936966,
"calib/step_q_w": 0.7361262798634812,
"calib/step_q_w_n": 586.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2977.0,
"completions/max_terminated_length": 2977.0,
"completions/mean_length": 450.375,
"completions/mean_terminated_length": 452.1412048339844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.00625656358897686,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0556,
"num_tokens": 34274893.0,
"reward": 0.96875,
"reward_std": 0.26861146092414856,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/format_reward_step": 0.9765625,
"step": 154
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49847333000997013,
"calib/avg_num_step_conf": 4.8359375,
"calib/ece": 0.34374015748031495,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.3228346456692913,
"calib/gap": 0.004805583250249157,
"calib/mean_conf": 0.8757086614173228,
"calib/mu_c": 0.8779411764705882,
"calib/mu_w": 0.8731355932203391,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.342007874015748,
"calib/std_conf": 0.08050584272201257,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.719593984962406,
"calib/step_q_c_n": 665.0,
"calib/step_q_gap": -0.028154706137070495,
"calib/step_q_w": 0.7477486910994765,
"calib/step_q_w_n": 573.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 878.0,
"completions/max_terminated_length": 878.0,
"completions/mean_length": 428.171875,
"completions/mean_terminated_length": 429.85101318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.006096397992223501,
"learning_rate": 1.25e-06,
"loss": -0.0035,
"num_tokens": 34491721.0,
"reward": 1.0234375,
"reward_std": 0.2608669400215149,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.984375,
"step": 155
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5112410071942446,
"calib/avg_num_step_conf": 5.046875,
"calib/ece": 0.3293227091633467,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3745019920318725,
"calib/gap": 0.009480986639260114,
"calib/mean_conf": 0.8831075697211156,
"calib/mu_c": 0.8873381294964029,
"calib/mu_w": 0.8778571428571428,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3293227091633467,
"calib/std_conf": 0.06063988456510979,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7347422680412372,
"calib/step_q_c_n": 679.0,
"calib/step_q_gap": -0.008879265400850933,
"calib/step_q_w": 0.7436215334420881,
"calib/step_q_w_n": 613.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3031.0,
"completions/max_terminated_length": 3031.0,
"completions/mean_length": 496.66015625,
"completions/mean_terminated_length": 496.66015625,
"completions/min_length": 121.0,
"completions/min_terminated_length": 121.0,
"epoch": 0.1664,
"grad_norm": 0.005818387493491173,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0341,
"num_tokens": 34723626.0,
"reward": 1.029296875,
"reward_std": 0.2707667052745819,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/format_reward_step": 0.97265625,
"step": 156
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4583667200854701,
"calib/avg_num_step_conf": 5.09375,
"calib/ece": 0.2544444444444445,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.2896825396825397,
"calib/gap": 0.006145833333333295,
"calib/mean_conf": 0.8734920634920635,
"calib/mu_c": 0.8758333333333334,
"calib/mu_w": 0.8696875000000001,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2544444444444445,
"calib/std_conf": 0.07091037782330305,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7502247191011236,
"calib/step_q_c_n": 801.0,
"calib/step_q_gap": 0.003962293653807514,
"calib/step_q_w": 0.7462624254473161,
"calib/step_q_w_n": 503.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2289.0,
"completions/max_terminated_length": 2289.0,
"completions/mean_length": 470.64453125,
"completions/mean_terminated_length": 474.35040283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.006078007165342569,
"learning_rate": 1.1944444444444446e-06,
"loss": -0.01,
"num_tokens": 34947839.0,
"reward": 1.1015625,
"reward_std": 0.28636425733566284,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/format_reward_step": 0.9765625,
"step": 157
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5060596026490066,
"calib/avg_num_step_conf": 4.87109375,
"calib/ece": 0.2841832669322709,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.3545816733067729,
"calib/gap": 0.001956291390728504,
"calib/mean_conf": 0.8857768924302788,
"calib/mu_c": 0.8865562913907286,
"calib/mu_w": 0.8846,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2841832669322709,
"calib/std_conf": 0.046095497009212054,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7688340192043897,
"calib/step_q_c_n": 729.0,
"calib/step_q_gap": 0.04169116206153256,
"calib/step_q_w": 0.7271428571428571,
"calib/step_q_w_n": 518.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2510.0,
"completions/max_terminated_length": 2510.0,
"completions/mean_length": 497.6953125,
"completions/mean_terminated_length": 497.6953125,
"completions/min_length": 164.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.006711493246257305,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.1127,
"num_tokens": 35180489.0,
"reward": 1.07421875,
"reward_std": 0.2854154706001282,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.96875,
"step": 158
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4503373313343328,
"calib/avg_num_step_conf": 4.3515625,
"calib/ece": 0.3520866141732284,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.39763779527559057,
"calib/gap": -0.008489505247376239,
"calib/mean_conf": 0.8890944881889764,
"calib/mu_c": 0.885217391304348,
"calib/mu_w": 0.8937068965517242,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.34893700787401577,
"calib/std_conf": 0.04508486015603499,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.767580919931857,
"calib/step_q_c_n": 587.0,
"calib/step_q_gap": 0.0019452463075685378,
"calib/step_q_w": 0.7656356736242884,
"calib/step_q_w_n": 527.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3026.0,
"completions/max_terminated_length": 3026.0,
"completions/mean_length": 436.96875,
"completions/mean_terminated_length": 436.96875,
"completions/min_length": 165.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.1696,
"grad_norm": 0.007543331012129784,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0626,
"num_tokens": 35397137.0,
"reward": 1.029296875,
"reward_std": 0.3060174286365509,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/format_reward_step": 0.98046875,
"step": 159
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5002294329727959,
"calib/avg_num_step_conf": 4.4453125,
"calib/ece": 0.3379435483870967,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.3225806451612903,
"calib/gap": -0.0024431333988854664,
"calib/mean_conf": 0.8822983870967742,
"calib/mu_c": 0.8811851851851853,
"calib/mu_w": 0.8836283185840708,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3379435483870967,
"calib/std_conf": 0.0479915113190746,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7541725352112676,
"calib/step_q_c_n": 568.0,
"calib/step_q_gap": 0.012453236965653591,
"calib/step_q_w": 0.741719298245614,
"calib/step_q_w_n": 570.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3014.0,
"completions/max_terminated_length": 3014.0,
"completions/mean_length": 490.06640625,
"completions/mean_terminated_length": 493.9252014160156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.0057596382685005665,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0433,
"num_tokens": 35627434.0,
"reward": 1.0078125,
"reward_std": 0.289905846118927,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.9609375,
"step": 160
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.46350392512077293,
"calib/avg_num_step_conf": 4.6328125,
"calib/ece": 0.16726562500000008,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.37109375,
"calib/gap": -0.00421497584541064,
"calib/mean_conf": 0.883359375,
"calib/mu_c": 0.8821739130434783,
"calib/mu_w": 0.8863888888888889,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1659375000000001,
"calib/std_conf": 0.05558323352962992,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7384870848708487,
"calib/step_q_c_n": 813.0,
"calib/step_q_gap": -0.02151291512915121,
"calib/step_q_w": 0.7599999999999999,
"calib/step_q_w_n": 373.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2291.0,
"completions/max_terminated_length": 2291.0,
"completions/mean_length": 427.3203125,
"completions/mean_terminated_length": 428.99609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 132.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.005399015732109547,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0224,
"num_tokens": 35840748.0,
"reward": 1.216796875,
"reward_std": 0.19859883189201355,
"rewards/accuracy_reward_step": 0.71875,
"rewards/format_reward_step": 0.99609375,
"step": 161
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5375821467688937,
"calib/avg_num_step_conf": 4.3125,
"calib/ece": 0.2282677165354331,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3188976377952756,
"calib/gap": 0.0099000547645125,
"calib/mean_conf": 0.8818110236220473,
"calib/mu_c": 0.8852409638554216,
"calib/mu_w": 0.8753409090909091,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2282677165354331,
"calib/std_conf": 0.0501009582763847,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7815418502202643,
"calib/step_q_c_n": 681.0,
"calib/step_q_gap": 0.026459107903479495,
"calib/step_q_w": 0.7550827423167848,
"calib/step_q_w_n": 423.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2126.0,
"completions/max_terminated_length": 2126.0,
"completions/mean_length": 413.90625,
"completions/mean_terminated_length": 413.90625,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.1728,
"grad_norm": 0.005997710861265659,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0414,
"num_tokens": 36050852.0,
"reward": 1.142578125,
"reward_std": 0.2104301154613495,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/format_reward_step": 0.98828125,
"step": 162
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4081221572449643,
"calib/avg_num_step_conf": 5.125,
"calib/ece": 0.34650602409638553,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.40160642570281124,
"calib/gap": -0.01474074074074061,
"calib/mean_conf": 0.8886746987951807,
"calib/mu_c": 0.8819259259259259,
"calib/mu_w": 0.8966666666666665,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.34650602409638553,
"calib/std_conf": 0.04568367976418741,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7717492711370262,
"calib/step_q_c_n": 686.0,
"calib/step_q_gap": 0.03189304110507729,
"calib/step_q_w": 0.7398562300319489,
"calib/step_q_w_n": 626.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2585.0,
"completions/max_terminated_length": 2585.0,
"completions/mean_length": 520.79296875,
"completions/mean_terminated_length": 520.79296875,
"completions/min_length": 166.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.00483884708955884,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.09,
"num_tokens": 36289007.0,
"reward": 1.013671875,
"reward_std": 0.21470002830028534,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.97265625,
"step": 163
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.46222222222222226,
"calib/avg_num_step_conf": 4.8828125,
"calib/ece": 0.34212,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.304,
"calib/gap": -0.005896940418679386,
"calib/mean_conf": 0.88212,
"calib/mu_c": 0.8794074074074074,
"calib/mu_w": 0.8853043478260868,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.34212,
"calib/std_conf": 0.044306947536475585,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7618051118210863,
"calib/step_q_c_n": 626.0,
"calib/step_q_gap": 0.016949342590317107,
"calib/step_q_w": 0.7448557692307692,
"calib/step_q_w_n": 624.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2787.0,
"completions/max_terminated_length": 2787.0,
"completions/mean_length": 536.5625,
"completions/mean_terminated_length": 536.5625,
"completions/min_length": 175.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.005693615879863501,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0656,
"num_tokens": 36532503.0,
"reward": 1.013671875,
"reward_std": 0.23496907949447632,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/format_reward_step": 0.97265625,
"step": 164
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.508505315822389,
"calib/avg_num_step_conf": 4.3359375,
"calib/ece": 0.3952964426877471,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.35968379446640314,
"calib/gap": 0.0014258911819887699,
"calib/mean_conf": 0.8814624505928854,
"calib/mu_c": 0.8821951219512196,
"calib/mu_w": 0.8807692307692309,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3952964426877471,
"calib/std_conf": 0.051197474511602,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7888933601609658,
"calib/step_q_c_n": 497.0,
"calib/step_q_gap": 0.027278352004358908,
"calib/step_q_w": 0.7616150081566069,
"calib/step_q_w_n": 613.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1421.0,
"completions/max_terminated_length": 1421.0,
"completions/mean_length": 478.30078125,
"completions/mean_terminated_length": 482.0669250488281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.176,
"grad_norm": 0.006250341422855854,
"learning_rate": 9.722222222222224e-07,
"loss": -0.0322,
"num_tokens": 36760524.0,
"reward": 0.96875,
"reward_std": 0.25991344451904297,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/format_reward_step": 0.9765625,
"step": 165
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4818796068796068,
"calib/avg_num_step_conf": 4.625,
"calib/ece": 0.18104,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.324,
"calib/gap": 0.0003578009828011375,
"calib/mean_conf": 0.88336,
"calib/mu_c": 0.8834659090909092,
"calib/mu_w": 0.883108108108108,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9609375,
"calib/pce": 0.1802,
"calib/std_conf": 0.051829628592147944,
"calib/step_conf_rate": 0.9609375,
"calib/step_q_c": 0.782295673076923,
"calib/step_q_c_n": 832.0,
"calib/step_q_gap": 0.004539991258741272,
"calib/step_q_w": 0.7777556818181818,
"calib/step_q_w_n": 352.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2726.0,
"completions/max_terminated_length": 2726.0,
"completions/mean_length": 509.8828125,
"completions/mean_terminated_length": 509.8828125,
"completions/min_length": 162.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.00509639410302043,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0401,
"num_tokens": 36997238.0,
"reward": 1.1640625,
"reward_std": 0.2525672912597656,
"rewards/accuracy_reward_step": 0.6875,
"rewards/format_reward_step": 0.953125,
"step": 166
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.47980446728038256,
"calib/avg_num_step_conf": 4.55078125,
"calib/ece": 0.19767716535433077,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2874015748031496,
"calib/gap": -0.004148290872760918,
"calib/mean_conf": 0.8787795275590552,
"calib/mu_c": 0.877456647398844,
"calib/mu_w": 0.8816049382716049,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.19767716535433077,
"calib/std_conf": 0.04546875943784006,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7828692493946733,
"calib/step_q_c_n": 826.0,
"calib/step_q_gap": 0.02437367417343439,
"calib/step_q_w": 0.7584955752212389,
"calib/step_q_w_n": 339.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1583.0,
"completions/max_terminated_length": 1583.0,
"completions/mean_length": 444.015625,
"completions/mean_terminated_length": 444.015625,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.005671403370797634,
"learning_rate": 9.166666666666666e-07,
"loss": 0.0097,
"num_tokens": 37216514.0,
"reward": 1.173828125,
"reward_std": 0.2115381509065628,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/format_reward_step": 0.98828125,
"step": 167
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.48184084630415747,
"calib/avg_num_step_conf": 4.67578125,
"calib/ece": 0.2668800000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.344,
"calib/gap": -0.0004824472744423547,
"calib/mean_conf": 0.8788800000000001,
"calib/mu_c": 0.8786928104575165,
"calib/mu_w": 0.8791752577319588,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2668800000000001,
"calib/std_conf": 0.0542028191148763,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7740029985007496,
"calib/step_q_c_n": 667.0,
"calib/step_q_gap": 0.09147469661395724,
"calib/step_q_w": 0.6825283018867924,
"calib/step_q_w_n": 530.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1516.0,
"completions/max_terminated_length": 1516.0,
"completions/mean_length": 486.69921875,
"completions/mean_terminated_length": 490.531494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 139.0,
"epoch": 0.1792,
"grad_norm": 0.005674854852259159,
"learning_rate": 8.88888888888889e-07,
"loss": -0.0191,
"num_tokens": 37445781.0,
"reward": 1.08203125,
"reward_std": 0.29917871952056885,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.96875,
"step": 168
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.42087472406181015,
"calib/avg_num_step_conf": 4.07421875,
"calib/ece": 0.27582995951417005,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.3441295546558704,
"calib/gap": -0.012168184326710851,
"calib/mean_conf": 0.884331983805668,
"calib/mu_c": 0.8796026490066224,
"calib/mu_w": 0.8917708333333333,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.96484375,
"calib/pce": 0.274412955465587,
"calib/std_conf": 0.05110379916187314,
"calib/step_conf_rate": 0.96484375,
"calib/step_q_c": 0.7932115677321159,
"calib/step_q_c_n": 657.0,
"calib/step_q_gap": 0.02049136047822986,
"calib/step_q_w": 0.772720207253886,
"calib/step_q_w_n": 386.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2452.0,
"completions/max_terminated_length": 2452.0,
"completions/mean_length": 460.453125,
"completions/mean_terminated_length": 464.0787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.005321961361914873,
"learning_rate": 8.611111111111112e-07,
"loss": 0.005,
"num_tokens": 37667841.0,
"reward": 1.064453125,
"reward_std": 0.213637113571167,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/format_reward_step": 0.94921875,
"step": 169
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5366047745358091,
"calib/avg_num_step_conf": 4.78125,
"calib/ece": 0.3043373493975903,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.3815261044176707,
"calib/gap": 0.008641246684350179,
"calib/mean_conf": 0.8866666666666666,
"calib/mu_c": 0.8902758620689656,
"calib/mu_w": 0.8816346153846154,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3043373493975903,
"calib/std_conf": 0.046926984784205795,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7943703703703704,
"calib/step_q_c_n": 675.0,
"calib/step_q_gap": 0.03692046144505168,
"calib/step_q_w": 0.7574499089253187,
"calib/step_q_w_n": 549.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2410.0,
"completions/max_terminated_length": 2410.0,
"completions/mean_length": 479.66796875,
"completions/mean_terminated_length": 485.3557434082031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.005628409795463085,
"learning_rate": 8.333333333333333e-07,
"loss": -0.0135,
"num_tokens": 37894788.0,
"reward": 1.0546875,
"reward_std": 0.22039489448070526,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/format_reward_step": 0.96875,
"step": 170
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5163370326740654,
"calib/avg_num_step_conf": 4.51953125,
"calib/ece": 0.3821653543307087,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.28346456692913385,
"calib/gap": 0.0019685039370079815,
"calib/mean_conf": 0.8821653543307086,
"calib/mu_c": 0.8831496062992126,
"calib/mu_w": 0.8811811023622046,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3821653543307087,
"calib/std_conf": 0.044761355622433004,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7756476964769647,
"calib/step_q_c_n": 615.0,
"calib/step_q_gap": 0.018378323783237782,
"calib/step_q_w": 0.757269372693727,
"calib/step_q_w_n": 542.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2429.0,
"completions/max_terminated_length": 2429.0,
"completions/mean_length": 481.66796875,
"completions/mean_terminated_length": 481.66796875,
"completions/min_length": 176.0,
"completions/min_terminated_length": 176.0,
"epoch": 0.1824,
"grad_norm": 0.00556573923677206,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0295,
"num_tokens": 38124991.0,
"reward": 0.990234375,
"reward_std": 0.2234094738960266,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/format_reward_step": 0.98828125,
"step": 171
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5799045915324985,
"calib/avg_num_step_conf": 4.5703125,
"calib/ece": 0.19524000000000002,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.312,
"calib/gap": 0.011417710196780284,
"calib/mean_conf": 0.88324,
"calib/mu_c": 0.8868023255813955,
"calib/mu_w": 0.8753846153846152,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.19524000000000002,
"calib/std_conf": 0.04096220697179292,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7867217280813216,
"calib/step_q_c_n": 787.0,
"calib/step_q_gap": 0.006538960457300624,
"calib/step_q_w": 0.780182767624021,
"calib/step_q_w_n": 383.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2475.0,
"completions/max_terminated_length": 2475.0,
"completions/mean_length": 464.640625,
"completions/mean_terminated_length": 464.640625,
"completions/min_length": 190.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.005885588005185127,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0429,
"num_tokens": 38347291.0,
"reward": 1.16015625,
"reward_std": 0.24455931782722473,
"rewards/accuracy_reward_step": 0.671875,
"rewards/format_reward_step": 0.9765625,
"step": 172
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4641386410432395,
"calib/avg_num_step_conf": 4.94921875,
"calib/ece": 0.2740562248995985,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.357429718875502,
"calib/gap": -0.00984694577899814,
"calib/mean_conf": 0.8861044176706827,
"calib/mu_c": 0.8823870967741936,
"calib/mu_w": 0.8922340425531917,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.26883534136546194,
"calib/std_conf": 0.04761463803899582,
"calib/step_conf_rate": 0.97265625,
"calib/step_q_c": 0.7915049751243782,
"calib/step_q_c_n": 804.0,
"calib/step_q_gap": -0.00011489528598884746,
"calib/step_q_w": 0.7916198704103671,
"calib/step_q_w_n": 463.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1610.0,
"completions/max_terminated_length": 1610.0,
"completions/mean_length": 512.10546875,
"completions/mean_terminated_length": 514.11376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 134.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.005655062850564718,
"learning_rate": 7.5e-07,
"loss": 0.0318,
"num_tokens": 38581550.0,
"reward": 1.091796875,
"reward_std": 0.23605836927890778,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/format_reward_step": 0.97265625,
"step": 173
},
{
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.4939894815927873,
"calib/avg_num_step_conf": 4.7109375,
"calib/ece": 0.38590909090909087,
"calib/final_conf_rate": 0.9453125,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.32644628099173556,
"calib/gap": 0.004710743801653039,
"calib/mean_conf": 0.885909090909091,
"calib/mu_c": 0.8882644628099174,
"calib/mu_w": 0.8835537190082644,
"calib/nonempty_final_conf_rate": 0.9453125,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.38590909090909087,
"calib/std_conf": 0.05268219075874214,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7820869565217391,
"calib/step_q_c_n": 575.0,
"calib/step_q_gap": -0.00025852683800731135,
"calib/step_q_w": 0.7823454833597464,
"calib/step_q_w_n": 631.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3044.0,
"completions/max_terminated_length": 3044.0,
"completions/mean_length": 597.12890625,
"completions/mean_terminated_length": 597.12890625,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.1856,
"grad_norm": 0.0053953444585204124,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0696,
"num_tokens": 38838647.0,
"reward": 0.94921875,
"reward_std": 0.33002573251724243,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/format_reward_step": 0.9453125,
"step": 174
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4530963001836788,
"calib/avg_num_step_conf": 5.26953125,
"calib/ece": 0.4718326693227094,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3187250996015936,
"calib/gap": -0.004855024927840179,
"calib/mean_conf": 0.8795617529880477,
"calib/mu_c": 0.8766990291262139,
"calib/mu_w": 0.8815540540540541,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4705179282868528,
"calib/std_conf": 0.05811966611190145,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7687124463519314,
"calib/step_q_c_n": 466.0,
"calib/step_q_gap": 0.03280078157276933,
"calib/step_q_w": 0.735911664779162,
"calib/step_q_w_n": 883.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2460.0,
"completions/max_terminated_length": 2460.0,
"completions/mean_length": 524.8984375,
"completions/mean_terminated_length": 526.9569091796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.005566365551203489,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0265,
"num_tokens": 39078845.0,
"reward": 0.888671875,
"reward_std": 0.26503777503967285,
"rewards/accuracy_reward_step": 0.40234375,
"rewards/format_reward_step": 0.97265625,
"step": 175
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4482153846153846,
"calib/avg_num_step_conf": 4.9453125,
"calib/ece": 0.38505882352941173,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.28627450980392155,
"calib/gap": 0.00020615384615396692,
"calib/mean_conf": 0.8752549019607843,
"calib/mu_c": 0.8753600000000001,
"calib/mu_w": 0.8751538461538462,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.38505882352941173,
"calib/std_conf": 0.07246252395501805,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7838860971524289,
"calib/step_q_c_n": 597.0,
"calib/step_q_gap": -0.007773095672683139,
"calib/step_q_w": 0.791659192825112,
"calib/step_q_w_n": 669.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1648.0,
"completions/max_terminated_length": 1648.0,
"completions/mean_length": 473.9375,
"completions/mean_terminated_length": 475.7961120605469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.005453173536807299,
"learning_rate": 6.666666666666667e-07,
"loss": -0.0133,
"num_tokens": 39304237.0,
"reward": 0.978515625,
"reward_std": 0.23507677018642426,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 0.98046875,
"step": 176
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.472159090909091,
"calib/avg_num_step_conf": 5.16015625,
"calib/ece": 0.3151181102362206,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.35826771653543305,
"calib/gap": -0.0011994949494948948,
"calib/mean_conf": 0.8820472440944882,
"calib/mu_c": 0.8815277777777778,
"calib/mu_w": 0.8827272727272727,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3151181102362206,
"calib/std_conf": 0.04767571862015603,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7901402524544181,
"calib/step_q_c_n": 713.0,
"calib/step_q_gap": 0.03043630508599704,
"calib/step_q_w": 0.7597039473684211,
"calib/step_q_w_n": 608.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3057.0,
"completions/max_terminated_length": 3057.0,
"completions/mean_length": 487.078125,
"completions/mean_terminated_length": 487.078125,
"completions/min_length": 140.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.1888,
"grad_norm": 0.00511728972196579,
"learning_rate": 6.388888888888889e-07,
"loss": 0.0236,
"num_tokens": 39532761.0,
"reward": 1.05859375,
"reward_std": 0.22648221254348755,
"rewards/accuracy_reward_step": 0.5625,
"rewards/format_reward_step": 0.9921875,
"step": 177
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4563706563706563,
"calib/avg_num_step_conf": 4.55859375,
"calib/ece": 0.2911857707509883,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.25691699604743085,
"calib/gap": -0.00883333333333336,
"calib/mean_conf": 0.8761660079051384,
"calib/mu_c": 0.8724999999999999,
"calib/mu_w": 0.8813333333333333,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2911857707509883,
"calib/std_conf": 0.046352363600316075,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7920760233918128,
"calib/step_q_c_n": 684.0,
"calib/step_q_gap": 0.005471468526388246,
"calib/step_q_w": 0.7866045548654246,
"calib/step_q_w_n": 483.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2268.0,
"completions/max_terminated_length": 2268.0,
"completions/mean_length": 440.015625,
"completions/mean_terminated_length": 441.7412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.006147797219455242,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0062,
"num_tokens": 39751477.0,
"reward": 1.076171875,
"reward_std": 0.24575549364089966,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.98828125,
"step": 178
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.48160511363636366,
"calib/avg_num_step_conf": 5.31640625,
"calib/ece": 0.23641129032258065,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.2661290322580645,
"calib/gap": -0.003903409090908805,
"calib/mean_conf": 0.8790725806451613,
"calib/mu_c": 0.8776875000000001,
"calib/mu_w": 0.8815909090909089,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23516129032258065,
"calib/std_conf": 0.04278069985913093,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7918959537572254,
"calib/step_q_c_n": 865.0,
"calib/step_q_gap": 0.05364998601528981,
"calib/step_q_w": 0.7382459677419356,
"calib/step_q_w_n": 496.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2973.0,
"completions/max_terminated_length": 2973.0,
"completions/mean_length": 496.18359375,
"completions/mean_terminated_length": 500.0905456542969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.0067147728987038136,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0132,
"num_tokens": 39984764.0,
"reward": 1.107421875,
"reward_std": 0.3205963373184204,
"rewards/accuracy_reward_step": 0.625,
"rewards/format_reward_step": 0.96484375,
"step": 179
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4240544731342277,
"calib/avg_num_step_conf": 5.625,
"calib/ece": 0.24299212598425196,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.32677165354330706,
"calib/gap": -0.009077057911413555,
"calib/mean_conf": 0.8847244094488189,
"calib/mu_c": 0.8814723926380369,
"calib/mu_w": 0.8905494505494504,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.24299212598425196,
"calib/std_conf": 0.04676674859617193,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7938191489361701,
"calib/step_q_c_n": 940.0,
"calib/step_q_gap": 0.010999148936170045,
"calib/step_q_w": 0.7828200000000001,
"calib/step_q_w_n": 500.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2602.0,
"completions/max_terminated_length": 2602.0,
"completions/mean_length": 564.0390625,
"completions/mean_terminated_length": 566.2510375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.192,
"grad_norm": 0.00554621359333396,
"learning_rate": 5.555555555555555e-07,
"loss": -0.0037,
"num_tokens": 40233014.0,
"reward": 1.12890625,
"reward_std": 0.1938880831003189,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 0.984375,
"step": 180
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.578968929224176,
"calib/avg_num_step_conf": 4.6875,
"calib/ece": 0.3655952380952382,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.2777777777777778,
"calib/gap": 0.012110039705048092,
"calib/mean_conf": 0.8775,
"calib/mu_c": 0.8834108527131783,
"calib/mu_w": 0.8713008130081302,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3655952380952382,
"calib/std_conf": 0.04577051972821017,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8016199376947041,
"calib/step_q_c_n": 642.0,
"calib/step_q_gap": 0.015024955615851154,
"calib/step_q_w": 0.7865949820788529,
"calib/step_q_w_n": 558.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2540.0,
"completions/max_terminated_length": 2540.0,
"completions/mean_length": 478.40234375,
"completions/mean_terminated_length": 478.40234375,
"completions/min_length": 186.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.006133066024631262,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0607,
"num_tokens": 40461749.0,
"reward": 0.99609375,
"reward_std": 0.2409520447254181,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/format_reward_step": 0.984375,
"step": 181
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4225753061900033,
"calib/avg_num_step_conf": 4.8515625,
"calib/ece": 0.2615354330708661,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.3346456692913386,
"calib/gap": -0.010688513737173166,
"calib/mean_conf": 0.8875196850393701,
"calib/mu_c": 0.8835220125786163,
"calib/mu_w": 0.8942105263157895,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2615354330708661,
"calib/std_conf": 0.042710221239487825,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7977588466579293,
"calib/step_q_c_n": 763.0,
"calib/step_q_gap": 0.011099138933503228,
"calib/step_q_w": 0.7866597077244261,
"calib/step_q_w_n": 479.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2375.0,
"completions/max_terminated_length": 2375.0,
"completions/mean_length": 490.37890625,
"completions/mean_terminated_length": 490.37890625,
"completions/min_length": 217.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.004917373415082693,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0375,
"num_tokens": 40693446.0,
"reward": 1.1171875,
"reward_std": 0.21607306599617004,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/format_reward_step": 0.9921875,
"step": 182
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5172730148687125,
"calib/avg_num_step_conf": 4.93359375,
"calib/ece": 0.30925196850393716,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3228346456692913,
"calib/gap": 0.012420752926288858,
"calib/mean_conf": 0.8801181102362206,
"calib/mu_c": 0.8854482758620689,
"calib/mu_w": 0.87302752293578,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30925196850393716,
"calib/std_conf": 0.05927038194542323,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7826462395543177,
"calib/step_q_c_n": 718.0,
"calib/step_q_gap": 0.018627890930464353,
"calib/step_q_w": 0.7640183486238533,
"calib/step_q_w_n": 545.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2744.0,
"completions/max_terminated_length": 2744.0,
"completions/mean_length": 527.984375,
"completions/mean_terminated_length": 527.984375,
"completions/min_length": 158.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.1952,
"grad_norm": 0.006127191707491875,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.0204,
"num_tokens": 40935290.0,
"reward": 1.060546875,
"reward_std": 0.2705356776714325,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/format_reward_step": 0.98828125,
"step": 183
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5093615494978478,
"calib/avg_num_step_conf": 5.1171875,
"calib/ece": 0.21194444444444455,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.36904761904761907,
"calib/gap": 0.0013041606886654566,
"calib/mean_conf": 0.8839285714285714,
"calib/mu_c": 0.8843529411764705,
"calib/mu_w": 0.883048780487805,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.21063492063492073,
"calib/std_conf": 0.048542679400255695,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.7907167630057802,
"calib/step_q_c_n": 865.0,
"calib/step_q_gap": 0.014334740533870072,
"calib/step_q_w": 0.7763820224719101,
"calib/step_q_w_n": 445.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2571.0,
"completions/max_terminated_length": 2571.0,
"completions/mean_length": 497.1640625,
"completions/mean_terminated_length": 499.11376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.006099920254200697,
"learning_rate": 4.444444444444445e-07,
"loss": 0.029,
"num_tokens": 41167844.0,
"reward": 1.15234375,
"reward_std": 0.28207772970199585,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/format_reward_step": 0.9765625,
"step": 184
},
{
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.5506459593183067,
"calib/avg_num_step_conf": 5.12109375,
"calib/ece": 0.32098765432098775,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.32098765432098764,
"calib/gap": 0.011863661352391586,
"calib/mean_conf": 0.8806584362139918,
"calib/mu_c": 0.8858823529411766,
"calib/mu_w": 0.874018691588785,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.32098765432098775,
"calib/std_conf": 0.05225352293657308,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7920152671755725,
"calib/step_q_c_n": 655.0,
"calib/step_q_gap": 0.04745734034630411,
"calib/step_q_w": 0.7445579268292684,
"calib/step_q_w_n": 656.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3017.0,
"completions/max_terminated_length": 3017.0,
"completions/mean_length": 560.9609375,
"completions/mean_terminated_length": 567.6126708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.005671345628798008,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0715,
"num_tokens": 41418370.0,
"reward": 1.005859375,
"reward_std": 0.28620392084121704,
"rewards/accuracy_reward_step": 0.53125,
"rewards/format_reward_step": 0.94921875,
"step": 185
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.3939333598831807,
"calib/avg_num_step_conf": 5.234375,
"calib/ece": 0.2565882352941176,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.33725490196078434,
"calib/gap": -0.0162206292313819,
"calib/mean_conf": 0.8827058823529411,
"calib/mu_c": 0.8767901234567903,
"calib/mu_w": 0.8930107526881722,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.25199999999999995,
"calib/std_conf": 0.04411362073130446,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7841351660939291,
"calib/step_q_c_n": 873.0,
"calib/step_q_gap": 0.002336450890502939,
"calib/step_q_w": 0.7817987152034261,
"calib/step_q_w_n": 467.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1585.0,
"completions/max_terminated_length": 1585.0,
"completions/mean_length": 492.640625,
"completions/mean_terminated_length": 494.57257080078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.1984,
"grad_norm": 0.005457146093249321,
"learning_rate": 3.8888888888888895e-07,
"loss": -0.011,
"num_tokens": 41649526.0,
"reward": 1.134765625,
"reward_std": 0.23541419208049774,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/format_reward_step": 0.99609375,
"step": 186
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4931908396946565,
"calib/avg_num_step_conf": 5.265625,
"calib/ece": 0.3692968750000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.29296875,
"calib/gap": -0.0005166412213740701,
"calib/mean_conf": 0.8810156250000001,
"calib/mu_c": 0.8807633587786259,
"calib/mu_w": 0.88128,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3692968750000001,
"calib/std_conf": 0.04707606616805801,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7949239999999999,
"calib/step_q_c_n": 750.0,
"calib/step_q_gap": 0.022532695652173862,
"calib/step_q_w": 0.772391304347826,
"calib/step_q_w_n": 598.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1139.0,
"completions/max_terminated_length": 1139.0,
"completions/mean_length": 497.23046875,
"completions/mean_terminated_length": 499.180419921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.0053702532313764095,
"learning_rate": 3.611111111111111e-07,
"loss": 0.0113,
"num_tokens": 41878361.0,
"reward": 1.009765625,
"reward_std": 0.26010939478874207,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/format_reward_step": 0.99609375,
"step": 187
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4917698580489278,
"calib/avg_num_step_conf": 5.23046875,
"calib/ece": 0.2009236947791166,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3172690763052209,
"calib/gap": -0.0022689519782542744,
"calib/mean_conf": 0.8862248995983936,
"calib/mu_c": 0.8855232558139535,
"calib/mu_w": 0.8877922077922078,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.19819277108433747,
"calib/std_conf": 0.043077002391416236,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.7952840909090909,
"calib/step_q_c_n": 880.0,
"calib/step_q_gap": 0.0007307140027730252,
"calib/step_q_w": 0.7945533769063179,
"calib/step_q_w_n": 459.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2943.0,
"completions/max_terminated_length": 2943.0,
"completions/mean_length": 548.7421875,
"completions/mean_terminated_length": 555.2490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.004811764694750309,
"learning_rate": 3.3333333333333335e-07,
"loss": -0.0023,
"num_tokens": 42122911.0,
"reward": 1.158203125,
"reward_std": 0.26997411251068115,
"rewards/accuracy_reward_step": 0.671875,
"rewards/format_reward_step": 0.97265625,
"step": 188
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.542660054676076,
"calib/avg_num_step_conf": 4.82421875,
"calib/ece": 0.30200787401574825,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.2755905511811024,
"calib/gap": 0.0077519231991860504,
"calib/mean_conf": 0.8807480314960632,
"calib/mu_c": 0.8840136054421768,
"calib/mu_w": 0.8762616822429907,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30200787401574825,
"calib/std_conf": 0.0403116732481052,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8018431911966989,
"calib/step_q_c_n": 727.0,
"calib/step_q_gap": 0.00928413607858869,
"calib/step_q_w": 0.7925590551181102,
"calib/step_q_w_n": 508.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1324.0,
"completions/max_terminated_length": 1324.0,
"completions/mean_length": 479.453125,
"completions/mean_terminated_length": 481.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.2016,
"grad_norm": 0.004866011440753937,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0141,
"num_tokens": 42353419.0,
"reward": 1.0703125,
"reward_std": 0.19307208061218262,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.9921875,
"step": 189
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5403007227671658,
"calib/avg_num_step_conf": 5.1796875,
"calib/ece": 0.2946640316205533,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3241106719367589,
"calib/gap": 0.005454310789881234,
"calib/mean_conf": 0.8835968379446639,
"calib/mu_c": 0.8858389261744967,
"calib/mu_w": 0.8803846153846154,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2946640316205533,
"calib/std_conf": 0.042844828831381035,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.7854931335830213,
"calib/step_q_c_n": 801.0,
"calib/step_q_gap": 0.01404551453540226,
"calib/step_q_w": 0.771447619047619,
"calib/step_q_w_n": 525.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2600.0,
"completions/max_terminated_length": 2600.0,
"completions/mean_length": 550.4921875,
"completions/mean_terminated_length": 552.6510009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.005678132176399231,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.024,
"num_tokens": 42599953.0,
"reward": 1.076171875,
"reward_std": 0.2409372329711914,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/format_reward_step": 0.98828125,
"step": 190
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4537071535022355,
"calib/avg_num_step_conf": 5.7578125,
"calib/ece": 0.36531496062992125,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.35826771653543305,
"calib/gap": -0.007728514654744112,
"calib/mean_conf": 0.8850000000000001,
"calib/mu_c": 0.8812878787878788,
"calib/mu_w": 0.889016393442623,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36531496062992125,
"calib/std_conf": 0.04383891414828597,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.7853507565337002,
"calib/step_q_c_n": 727.0,
"calib/step_q_gap": -0.01036544159213637,
"calib/step_q_w": 0.7957161981258366,
"calib/step_q_w_n": 747.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2096.0,
"completions/max_terminated_length": 2096.0,
"completions/mean_length": 509.91796875,
"completions/mean_terminated_length": 511.91766357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.005636806599795818,
"learning_rate": 2.5000000000000004e-07,
"loss": -0.0047,
"num_tokens": 42834660.0,
"reward": 1.01171875,
"reward_std": 0.2590974271297455,
"rewards/accuracy_reward_step": 0.515625,
"rewards/format_reward_step": 0.9921875,
"step": 191
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.47105037301115726,
"calib/avg_num_step_conf": 4.734375,
"calib/ece": 0.2821825396825397,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3055555555555556,
"calib/gap": -0.006280451574569024,
"calib/mean_conf": 0.8830555555555557,
"calib/mu_c": 0.8805882352941178,
"calib/mu_w": 0.8868686868686868,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.2790476190476191,
"calib/std_conf": 0.04528337680835594,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.7964315352697097,
"calib/step_q_c_n": 723.0,
"calib/step_q_gap": 0.05088961297932104,
"calib/step_q_w": 0.7455419222903886,
"calib/step_q_w_n": 489.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2667.0,
"completions/max_terminated_length": 2667.0,
"completions/mean_length": 519.4609375,
"completions/mean_terminated_length": 521.498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.2048,
"grad_norm": 0.006023888476192951,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0452,
"num_tokens": 43072618.0,
"reward": 1.083984375,
"reward_std": 0.2951584458351135,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.97265625,
"step": 192
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4333597516676574,
"calib/avg_num_step_conf": 5.1015625,
"calib/ece": 0.29244,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.292,
"calib/gap": -0.006681857208902908,
"calib/mean_conf": 0.8804399999999999,
"calib/mu_c": 0.877687074829932,
"calib/mu_w": 0.8843689320388349,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.29244,
"calib/std_conf": 0.04479516045288822,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.8042016806722688,
"calib/step_q_c_n": 714.0,
"calib/step_q_gap": 0.004809788780376967,
"calib/step_q_w": 0.7993918918918919,
"calib/step_q_w_n": 592.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2853.0,
"completions/max_terminated_length": 2853.0,
"completions/mean_length": 509.51953125,
"completions/mean_terminated_length": 513.531494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 210.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.005428059492260218,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.0513,
"num_tokens": 43308767.0,
"reward": 1.060546875,
"reward_std": 0.3096548914909363,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/format_reward_step": 0.97265625,
"step": 193
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4667326732673268,
"calib/avg_num_step_conf": 5.37109375,
"calib/ece": 0.28350597609561756,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.30677290836653387,
"calib/gap": -0.003932013201320195,
"calib/mean_conf": 0.8811155378486056,
"calib/mu_c": 0.8795333333333332,
"calib/mu_w": 0.8834653465346534,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.28350597609561756,
"calib/std_conf": 0.04544983879206464,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.78445,
"calib/step_q_c_n": 800.0,
"calib/step_q_gap": 0.00851956521739139,
"calib/step_q_w": 0.7759304347826086,
"calib/step_q_w_n": 575.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2880.0,
"completions/max_terminated_length": 2880.0,
"completions/mean_length": 490.671875,
"completions/mean_terminated_length": 494.5354309082031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.0054902262054383755,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.0639,
"num_tokens": 43540323.0,
"reward": 1.07421875,
"reward_std": 0.25600647926330566,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.9765625,
"step": 194
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5666205168776371,
"calib/avg_num_step_conf": 5.2109375,
"calib/ece": 0.26287401574803143,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.37401574803149606,
"calib/gap": 0.012432753164556831,
"calib/mean_conf": 0.8849212598425196,
"calib/mu_c": 0.889620253164557,
"calib/mu_w": 0.8771875000000001,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.26287401574803143,
"calib/std_conf": 0.04395543668490655,
"calib/step_conf_rate": 0.98046875,
"calib/step_q_c": 0.7980699638118215,
"calib/step_q_c_n": 829.0,
"calib/step_q_gap": 0.009218478663306606,
"calib/step_q_w": 0.7888514851485149,
"calib/step_q_w_n": 505.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2845.0,
"completions/max_terminated_length": 2845.0,
"completions/mean_length": 508.24609375,
"completions/mean_terminated_length": 508.24609375,
"completions/min_length": 162.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.208,
"grad_norm": 0.005612097214907408,
"learning_rate": 1.3888888888888888e-07,
"loss": 0.015,
"num_tokens": 43776418.0,
"reward": 1.10546875,
"reward_std": 0.22084440290927887,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/format_reward_step": 0.9765625,
"step": 195
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.49657142857142855,
"calib/avg_num_step_conf": 4.94921875,
"calib/ece": 0.2945882352941178,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.23921568627450981,
"calib/gap": -0.0015619047619047643,
"calib/mean_conf": 0.8771764705882353,
"calib/mu_c": 0.8765333333333335,
"calib/mu_w": 0.8780952380952383,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2917647058823531,
"calib/std_conf": 0.041281770122004194,
"calib/step_conf_rate": 0.99609375,
"calib/step_q_c": 0.8079945054945056,
"calib/step_q_c_n": 728.0,
"calib/step_q_gap": 0.004580776366490791,
"calib/step_q_w": 0.8034137291280148,
"calib/step_q_w_n": 539.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2710.0,
"completions/max_terminated_length": 2710.0,
"completions/mean_length": 435.8515625,
"completions/mean_terminated_length": 435.8515625,
"completions/min_length": 170.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.005549365654587746,
"learning_rate": 1.1111111111111112e-07,
"loss": 0.0249,
"num_tokens": 43990540.0,
"reward": 1.080078125,
"reward_std": 0.21700042486190796,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/format_reward_step": 0.98828125,
"step": 196
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.510432,
"calib/avg_num_step_conf": 5.94140625,
"calib/ece": 0.3899200000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.352,
"calib/gap": 0.0014399999999999968,
"calib/mean_conf": 0.8881600000000001,
"calib/mu_c": 0.88888,
"calib/mu_w": 0.88744,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3890400000000001,
"calib/std_conf": 0.042263629754198825,
"calib/step_conf_rate": 0.9921875,
"calib/step_q_c": 0.8044175491679274,
"calib/step_q_c_n": 661.0,
"calib/step_q_gap": 0.03278964219118319,
"calib/step_q_w": 0.7716279069767442,
"calib/step_q_w_n": 860.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3069.0,
"completions/max_terminated_length": 3069.0,
"completions/mean_length": 549.078125,
"completions/mean_terminated_length": 551.2313842773438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.005433402024209499,
"learning_rate": 8.333333333333334e-08,
"loss": 0.1145,
"num_tokens": 44236160.0,
"reward": 0.97265625,
"reward_std": 0.26813995838165283,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/format_reward_step": 0.96875,
"step": 197
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4489684074790457,
"calib/avg_num_step_conf": 5.6328125,
"calib/ece": 0.3200796812749005,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.2908366533864542,
"calib/gap": -0.007256608639587214,
"calib/mean_conf": 0.8818326693227093,
"calib/mu_c": 0.8786524822695035,
"calib/mu_w": 0.8859090909090908,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3200796812749005,
"calib/std_conf": 0.0451979696733806,
"calib/step_conf_rate": 1.0,
"calib/step_q_c": 0.8025434243176177,
"calib/step_q_c_n": 806.0,
"calib/step_q_gap": 0.005530845701265497,
"calib/step_q_w": 0.7970125786163522,
"calib/step_q_w_n": 636.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2820.0,
"completions/max_terminated_length": 2820.0,
"completions/mean_length": 488.9453125,
"completions/mean_terminated_length": 490.8627624511719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.2112,
"grad_norm": 0.005839452147483826,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0501,
"num_tokens": 44466714.0,
"reward": 1.041015625,
"reward_std": 0.25976496934890747,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/format_reward_step": 0.98046875,
"step": 198
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.49778464017185825,
"calib/avg_num_step_conf": 5.3671875,
"calib/ece": 0.27671999999999997,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.344,
"calib/gap": -0.0021388292158970312,
"calib/mean_conf": 0.8847200000000001,
"calib/mu_c": 0.8838815789473684,
"calib/mu_w": 0.8860204081632654,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.27671999999999997,
"calib/std_conf": 0.050184874215245374,
"calib/step_conf_rate": 0.984375,
"calib/step_q_c": 0.8068908629441623,
"calib/step_q_c_n": 788.0,
"calib/step_q_gap": 0.04632772301242172,
"calib/step_q_w": 0.7605631399317406,
"calib/step_q_w_n": 586.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2923.0,
"completions/max_terminated_length": 2923.0,
"completions/mean_length": 576.2734375,
"completions/mean_terminated_length": 576.2734375,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.005713994614779949,
"learning_rate": 2.777777777777778e-08,
"loss": 0.057,
"num_tokens": 44718440.0,
"reward": 1.076171875,
"reward_std": 0.2899458706378937,
"rewards/accuracy_reward_step": 0.59375,
"rewards/format_reward_step": 0.96484375,
"step": 199
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4710925039872408,
"calib/avg_num_step_conf": 4.77734375,
"calib/ece": 0.2786454183266933,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.30278884462151395,
"calib/gap": -0.00536948431685258,
"calib/mean_conf": 0.8842231075697211,
"calib/mu_c": 0.8821052631578948,
"calib/mu_w": 0.8874747474747474,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2786454183266933,
"calib/std_conf": 0.04506407593797755,
"calib/step_conf_rate": 0.98828125,
"calib/step_q_c": 0.8096626180836708,
"calib/step_q_c_n": 741.0,
"calib/step_q_gap": 0.04057548115421017,
"calib/step_q_w": 0.7690871369294606,
"calib/step_q_w_n": 482.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2292.0,
"completions/max_terminated_length": 2292.0,
"completions/mean_length": 498.34375,
"completions/mean_terminated_length": 504.25299072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.006134728901088238,
"learning_rate": 0.0,
"loss": -0.0127,
"num_tokens": 44954064.0,
"reward": 1.0859375,
"reward_std": 0.2796323895454407,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/format_reward_step": 0.9765625,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.0328808896424016,
"train_runtime": 8801.639,
"train_samples_per_second": 5.817,
"train_steps_per_second": 0.023
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 44954064,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}