9244 lines
340 KiB
JSON
9244 lines
340 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.32,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 200,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7847598522167488,
|
||
|
|
"calib/avg_num_step_conf": 3.85546875,
|
||
|
|
"calib/ece": 0.31390625000000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.53515625,
|
||
|
|
"calib/gap": 0.16190147783251252,
|
||
|
|
"calib/mean_conf": 0.86078125,
|
||
|
|
"calib/mu_c": 0.9341428571428574,
|
||
|
|
"calib/mu_w": 0.7722413793103449,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.31390625000000005,
|
||
|
|
"calib/std_conf": 0.18074762003533407,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8456213017751478,
|
||
|
|
"calib/step_q_c_n": 507.0,
|
||
|
|
"calib/step_q_gap": 0.12451713510848117,
|
||
|
|
"calib/step_q_w": 0.7211041666666667,
|
||
|
|
"calib/step_q_w_n": 480.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0016,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 252032.0,
|
||
|
|
"reward": 1.046875,
|
||
|
|
"reward_std": 0.15702980756759644,
|
||
|
|
"rewards/accuracy_reward_step": 0.546875,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6568917668825163,
|
||
|
|
"calib/avg_num_step_conf": 4.16015625,
|
||
|
|
"calib/ece": 0.4085156250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.51953125,
|
||
|
|
"calib/gap": 0.10610299105766285,
|
||
|
|
"calib/mean_conf": 0.8577343749999999,
|
||
|
|
"calib/mu_c": 0.9161739130434784,
|
||
|
|
"calib/mu_w": 0.8100709219858155,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4085156250000001,
|
||
|
|
"calib/std_conf": 0.1895294949166471,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.7985778781038374,
|
||
|
|
"calib/step_q_c_n": 443.0,
|
||
|
|
"calib/step_q_gap": 0.05455858549933579,
|
||
|
|
"calib/step_q_w": 0.7440192926045016,
|
||
|
|
"calib/step_q_w_n": 622.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0032,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 511736.0,
|
||
|
|
"reward": 0.9453125,
|
||
|
|
"reward_std": 0.17519709467887878,
|
||
|
|
"rewards/accuracy_reward_step": 0.44921875,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6860458015267176,
|
||
|
|
"calib/avg_num_step_conf": 3.94921875,
|
||
|
|
"calib/ece": 0.40558593750000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.625,
|
||
|
|
"calib/gap": 0.08264854961832058,
|
||
|
|
"calib/mean_conf": 0.8938671874999999,
|
||
|
|
"calib/mu_c": 0.93616,
|
||
|
|
"calib/mu_w": 0.8535114503816794,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.40558593750000005,
|
||
|
|
"calib/std_conf": 0.14239951891014183,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8439232409381665,
|
||
|
|
"calib/step_q_c_n": 469.0,
|
||
|
|
"calib/step_q_gap": 0.05460589776473479,
|
||
|
|
"calib/step_q_w": 0.7893173431734317,
|
||
|
|
"calib/step_q_w_n": 542.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0048,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 767128.0,
|
||
|
|
"reward": 0.986328125,
|
||
|
|
"reward_std": 0.15006008744239807,
|
||
|
|
"rewards/accuracy_reward_step": 0.48828125,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7235286798556676,
|
||
|
|
"calib/avg_num_step_conf": 4.0,
|
||
|
|
"calib/ece": 0.3897647058823531,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5529411764705883,
|
||
|
|
"calib/gap": 0.17866368047779035,
|
||
|
|
"calib/mean_conf": 0.8368235294117646,
|
||
|
|
"calib/mu_c": 0.9356140350877193,
|
||
|
|
"calib/mu_w": 0.756950354609929,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3897647058823531,
|
||
|
|
"calib/std_conf": 0.2336053909096557,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8339447236180904,
|
||
|
|
"calib/step_q_c_n": 398.0,
|
||
|
|
"calib/step_q_gap": 0.12412044246793053,
|
||
|
|
"calib/step_q_w": 0.7098242811501598,
|
||
|
|
"calib/step_q_w_n": 626.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0064,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 1024896.0,
|
||
|
|
"reward": 0.94140625,
|
||
|
|
"reward_std": 0.16598647832870483,
|
||
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7077480042081812,
|
||
|
|
"calib/avg_num_step_conf": 4.08203125,
|
||
|
|
"calib/ece": 0.4300390625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.640625,
|
||
|
|
"calib/gap": 0.1347676217587721,
|
||
|
|
"calib/mean_conf": 0.8714453124999999,
|
||
|
|
"calib/mu_c": 0.9467256637168141,
|
||
|
|
"calib/mu_w": 0.811958041958042,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4300390625,
|
||
|
|
"calib/std_conf": 0.20415379116435076,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8484027777777777,
|
||
|
|
"calib/step_q_c_n": 432.0,
|
||
|
|
"calib/step_q_gap": 0.07176329980061613,
|
||
|
|
"calib/step_q_w": 0.7766394779771616,
|
||
|
|
"calib/step_q_w_n": 613.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.008,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 1285536.0,
|
||
|
|
"reward": 0.94140625,
|
||
|
|
"reward_std": 0.12164628505706787,
|
||
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6461634047840944,
|
||
|
|
"calib/avg_num_step_conf": 4.109375,
|
||
|
|
"calib/ece": 0.35417968750000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.71875,
|
||
|
|
"calib/gap": 0.04493383038210619,
|
||
|
|
"calib/mean_conf": 0.9205859374999998,
|
||
|
|
"calib/mu_c": 0.9400689655172414,
|
||
|
|
"calib/mu_w": 0.8951351351351352,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.35417968750000006,
|
||
|
|
"calib/std_conf": 0.10330877771150955,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8379241877256318,
|
||
|
|
"calib/step_q_c_n": 554.0,
|
||
|
|
"calib/step_q_gap": 0.04374748089832259,
|
||
|
|
"calib/step_q_w": 0.7941767068273092,
|
||
|
|
"calib/step_q_w_n": 498.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0096,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 1547088.0,
|
||
|
|
"reward": 1.064453125,
|
||
|
|
"reward_std": 0.23002484440803528,
|
||
|
|
"rewards/accuracy_reward_step": 0.56640625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7235908921239268,
|
||
|
|
"calib/avg_num_step_conf": 3.8828125,
|
||
|
|
"calib/ece": 0.28709803921568644,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.6235294117647059,
|
||
|
|
"calib/gap": 0.21042926465098943,
|
||
|
|
"calib/mean_conf": 0.8400392156862744,
|
||
|
|
"calib/mu_c": 0.9341134751773053,
|
||
|
|
"calib/mu_w": 0.7236842105263158,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.28709803921568644,
|
||
|
|
"calib/std_conf": 0.24094177644941636,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8277351247600768,
|
||
|
|
"calib/step_q_c_n": 521.0,
|
||
|
|
"calib/step_q_gap": 0.17913047359728618,
|
||
|
|
"calib/step_q_w": 0.6486046511627906,
|
||
|
|
"calib/step_q_w_n": 473.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 87.0,
|
||
|
|
"completions/max_terminated_length": 87.0,
|
||
|
|
"completions/mean_length": 0.33984375,
|
||
|
|
"completions/mean_terminated_length": 87.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 87.0,
|
||
|
|
"epoch": 0.0112,
|
||
|
|
"grad_norm": 2.3416316509246826,
|
||
|
|
"learning_rate": 3e-07,
|
||
|
|
"loss": 0.0211,
|
||
|
|
"num_tokens": 1807055.0,
|
||
|
|
"reward": 1.044921875,
|
||
|
|
"reward_std": 0.19384272396564484,
|
||
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.5795014208214931,
|
||
|
|
"calib/avg_num_step_conf": 3.76953125,
|
||
|
|
"calib/ece": 0.3051953124999999,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.73046875,
|
||
|
|
"calib/gap": 0.04023378971841929,
|
||
|
|
"calib/mean_conf": 0.9223828124999999,
|
||
|
|
"calib/mu_c": 0.9377848101265823,
|
||
|
|
"calib/mu_w": 0.897551020408163,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3051953124999999,
|
||
|
|
"calib/std_conf": 0.08503782440531885,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8459363957597174,
|
||
|
|
"calib/step_q_c_n": 566.0,
|
||
|
|
"calib/step_q_gap": 0.05295393961936656,
|
||
|
|
"calib/step_q_w": 0.7929824561403508,
|
||
|
|
"calib/step_q_w_n": 399.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0128,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 2068111.0,
|
||
|
|
"reward": 1.1171875,
|
||
|
|
"reward_std": 0.13743899762630463,
|
||
|
|
"rewards/accuracy_reward_step": 0.6171875,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6410148584244462,
|
||
|
|
"calib/avg_num_step_conf": 4.30078125,
|
||
|
|
"calib/ece": 0.4741406250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.45703125,
|
||
|
|
"calib/gap": 0.16461732548359975,
|
||
|
|
"calib/mean_conf": 0.794453125,
|
||
|
|
"calib/mu_c": 0.9063414634146343,
|
||
|
|
"calib/mu_w": 0.7417241379310345,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4741406250000001,
|
||
|
|
"calib/std_conf": 0.26864331776118006,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8122873900293255,
|
||
|
|
"calib/step_q_c_n": 341.0,
|
||
|
|
"calib/step_q_gap": 0.11049791634511497,
|
||
|
|
"calib/step_q_w": 0.7017894736842105,
|
||
|
|
"calib/step_q_w_n": 760.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0144,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 2329039.0,
|
||
|
|
"reward": 0.818359375,
|
||
|
|
"reward_std": 0.13821910321712494,
|
||
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6659653195966044,
|
||
|
|
"calib/avg_num_step_conf": 4.1171875,
|
||
|
|
"calib/ece": 0.5887500000000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.59375,
|
||
|
|
"calib/gap": 0.06976420227816882,
|
||
|
|
"calib/mean_conf": 0.88953125,
|
||
|
|
"calib/mu_c": 0.9383116883116885,
|
||
|
|
"calib/mu_w": 0.8685474860335197,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.5887500000000001,
|
||
|
|
"calib/std_conf": 0.14614504447102372,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8195847750865053,
|
||
|
|
"calib/step_q_c_n": 289.0,
|
||
|
|
"calib/step_q_gap": 0.056578239138792785,
|
||
|
|
"calib/step_q_w": 0.7630065359477125,
|
||
|
|
"calib/step_q_w_n": 765.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.016,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 2588767.0,
|
||
|
|
"reward": 0.798828125,
|
||
|
|
"reward_std": 0.20692811906337738,
|
||
|
|
"rewards/accuracy_reward_step": 0.30078125,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6704088050314465,
|
||
|
|
"calib/avg_num_step_conf": 4.36328125,
|
||
|
|
"calib/ece": 0.4537890625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5703125,
|
||
|
|
"calib/gap": 0.11170188679245274,
|
||
|
|
"calib/mean_conf": 0.8678515625,
|
||
|
|
"calib/mu_c": 0.9333018867924529,
|
||
|
|
"calib/mu_w": 0.8216000000000001,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4537890625,
|
||
|
|
"calib/std_conf": 0.1801704743883098,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8169249394673123,
|
||
|
|
"calib/step_q_c_n": 413.0,
|
||
|
|
"calib/step_q_gap": 0.05183971219458494,
|
||
|
|
"calib/step_q_w": 0.7650852272727273,
|
||
|
|
"calib/step_q_w_n": 704.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0176,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 2845111.0,
|
||
|
|
"reward": 0.912109375,
|
||
|
|
"reward_std": 0.1373104453086853,
|
||
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7028461019444274,
|
||
|
|
"calib/avg_num_step_conf": 4.05078125,
|
||
|
|
"calib/ece": 0.315859375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.62109375,
|
||
|
|
"calib/gap": 0.1911145188002208,
|
||
|
|
"calib/mean_conf": 0.8510156249999998,
|
||
|
|
"calib/mu_c": 0.9398540145985402,
|
||
|
|
"calib/mu_w": 0.7487394957983194,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.315859375,
|
||
|
|
"calib/std_conf": 0.24001249916589634,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8274908424908425,
|
||
|
|
"calib/step_q_c_n": 546.0,
|
||
|
|
"calib/step_q_gap": 0.10502648403870396,
|
||
|
|
"calib/step_q_w": 0.7224643584521385,
|
||
|
|
"calib/step_q_w_n": 491.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0192,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 3104999.0,
|
||
|
|
"reward": 1.033203125,
|
||
|
|
"reward_std": 0.2238508015871048,
|
||
|
|
"rewards/accuracy_reward_step": 0.53515625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6854516386890488,
|
||
|
|
"calib/avg_num_step_conf": 3.96484375,
|
||
|
|
"calib/ece": 0.37953125000000004,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.65625,
|
||
|
|
"calib/gap": 0.19506856053618626,
|
||
|
|
"calib/mean_conf": 0.8365624999999999,
|
||
|
|
"calib/mu_c": 0.9424786324786323,
|
||
|
|
"calib/mu_w": 0.747410071942446,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.37953125000000004,
|
||
|
|
"calib/std_conf": 0.27608857698888956,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8363274336283185,
|
||
|
|
"calib/step_q_c_n": 452.0,
|
||
|
|
"calib/step_q_gap": 0.12551038211854937,
|
||
|
|
"calib/step_q_w": 0.7108170515097691,
|
||
|
|
"calib/step_q_w_n": 563.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0208,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 3366951.0,
|
||
|
|
"reward": 0.95703125,
|
||
|
|
"reward_std": 0.16584046185016632,
|
||
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.744658708295072,
|
||
|
|
"calib/avg_num_step_conf": 4.12890625,
|
||
|
|
"calib/ece": 0.3450781250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.6015625,
|
||
|
|
"calib/gap": 0.14579614325068901,
|
||
|
|
"calib/mean_conf": 0.8724218749999999,
|
||
|
|
"calib/mu_c": 0.9413333333333335,
|
||
|
|
"calib/mu_w": 0.7955371900826445,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.3450781250000001,
|
||
|
|
"calib/std_conf": 0.17242637058026936,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8381312127236581,
|
||
|
|
"calib/step_q_c_n": 503.0,
|
||
|
|
"calib/step_q_gap": 0.10291460622546311,
|
||
|
|
"calib/step_q_w": 0.735216606498195,
|
||
|
|
"calib/step_q_w_n": 554.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0224,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 3628071.0,
|
||
|
|
"reward": 1.0234375,
|
||
|
|
"reward_std": 0.08982988446950912,
|
||
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7537163645487048,
|
||
|
|
"calib/avg_num_step_conf": 3.9296875,
|
||
|
|
"calib/ece": 0.4614453125000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.62109375,
|
||
|
|
"calib/gap": 0.11260427773944692,
|
||
|
|
"calib/mean_conf": 0.8794140625,
|
||
|
|
"calib/mu_c": 0.9449532710280375,
|
||
|
|
"calib/mu_w": 0.8323489932885906,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4614453125000001,
|
||
|
|
"calib/std_conf": 0.1775507421900739,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.851424802110818,
|
||
|
|
"calib/step_q_c_n": 379.0,
|
||
|
|
"calib/step_q_gap": 0.0865603682990157,
|
||
|
|
"calib/step_q_w": 0.7648644338118022,
|
||
|
|
"calib/step_q_w_n": 627.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.024,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 3888863.0,
|
||
|
|
"reward": 0.916015625,
|
||
|
|
"reward_std": 0.14187777042388916,
|
||
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6735380474675801,
|
||
|
|
"calib/avg_num_step_conf": 4.2734375,
|
||
|
|
"calib/ece": 0.358984375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.64453125,
|
||
|
|
"calib/gap": 0.09122339124051859,
|
||
|
|
"calib/mean_conf": 0.8824218749999999,
|
||
|
|
"calib/mu_c": 0.9258955223880597,
|
||
|
|
"calib/mu_w": 0.8346721311475411,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.358984375,
|
||
|
|
"calib/std_conf": 0.15622372826009617,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8350915750915752,
|
||
|
|
"calib/step_q_c_n": 546.0,
|
||
|
|
"calib/step_q_gap": 0.04899668458062634,
|
||
|
|
"calib/step_q_w": 0.7860948905109488,
|
||
|
|
"calib/step_q_w_n": 548.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0256,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 4149623.0,
|
||
|
|
"reward": 1.021484375,
|
||
|
|
"reward_std": 0.16464470326900482,
|
||
|
|
"rewards/accuracy_reward_step": 0.5234375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6859403729971106,
|
||
|
|
"calib/avg_num_step_conf": 4.03515625,
|
||
|
|
"calib/ece": 0.4659375000000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.4921875,
|
||
|
|
"calib/gap": 0.14501707381140017,
|
||
|
|
"calib/mean_conf": 0.8331249999999999,
|
||
|
|
"calib/mu_c": 0.9248936170212767,
|
||
|
|
"calib/mu_w": 0.7798765432098765,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4659375000000001,
|
||
|
|
"calib/std_conf": 0.22050279901851585,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8268684210526317,
|
||
|
|
"calib/step_q_c_n": 380.0,
|
||
|
|
"calib/step_q_gap": 0.10181482227774652,
|
||
|
|
"calib/step_q_w": 0.7250535987748852,
|
||
|
|
"calib/step_q_w_n": 653.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0272,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 4398623.0,
|
||
|
|
"reward": 0.8671875,
|
||
|
|
"reward_std": 0.1536148637533188,
|
||
|
|
"rewards/accuracy_reward_step": 0.3671875,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6764777798153921,
|
||
|
|
"calib/avg_num_step_conf": 4.45703125,
|
||
|
|
"calib/ece": 0.367109375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.52734375,
|
||
|
|
"calib/gap": 0.12922672534996027,
|
||
|
|
"calib/mean_conf": 0.847578125,
|
||
|
|
"calib/mu_c": 0.9147154471544716,
|
||
|
|
"calib/mu_w": 0.7854887218045113,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.367109375,
|
||
|
|
"calib/std_conf": 0.18590045258547483,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8189463220675944,
|
||
|
|
"calib/step_q_c_n": 503.0,
|
||
|
|
"calib/step_q_gap": 0.07509052269455374,
|
||
|
|
"calib/step_q_w": 0.7438557993730407,
|
||
|
|
"calib/step_q_w_n": 638.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0288,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.499999999999999e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 4659959.0,
|
||
|
|
"reward": 0.98046875,
|
||
|
|
"reward_std": 0.22134128212928772,
|
||
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7079899896233901,
|
||
|
|
"calib/avg_num_step_conf": 3.94921875,
|
||
|
|
"calib/ece": 0.367421875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.58984375,
|
||
|
|
"calib/gap": 0.1344381370933284,
|
||
|
|
"calib/mean_conf": 0.8635156249999999,
|
||
|
|
"calib/mu_c": 0.9312598425196851,
|
||
|
|
"calib/mu_w": 0.7968217054263567,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.367421875,
|
||
|
|
"calib/std_conf": 0.20059322117374598,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8341860465116279,
|
||
|
|
"calib/step_q_c_n": 473.0,
|
||
|
|
"calib/step_q_gap": 0.0985169015302153,
|
||
|
|
"calib/step_q_w": 0.7356691449814126,
|
||
|
|
"calib/step_q_w_n": 538.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0304,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 4920623.0,
|
||
|
|
"reward": 0.994140625,
|
||
|
|
"reward_std": 0.18043741583824158,
|
||
|
|
"rewards/accuracy_reward_step": 0.49609375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.6436936936936936,
|
||
|
|
"calib/avg_num_step_conf": 3.98046875,
|
||
|
|
"calib/ece": 0.4508695652173912,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5731225296442688,
|
||
|
|
"calib/gap": 0.08956949806949799,
|
||
|
|
"calib/mean_conf": 0.865889328063241,
|
||
|
|
"calib/mu_c": 0.9182857142857143,
|
||
|
|
"calib/mu_w": 0.8287162162162163,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4508695652173912,
|
||
|
|
"calib/std_conf": 0.17095552089144098,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8316372795969772,
|
||
|
|
"calib/step_q_c_n": 397.0,
|
||
|
|
"calib/step_q_gap": 0.06138004487028903,
|
||
|
|
"calib/step_q_w": 0.7702572347266882,
|
||
|
|
"calib/step_q_w_n": 622.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 280.0,
|
||
|
|
"completions/max_terminated_length": 280.0,
|
||
|
|
"completions/mean_length": 1.80859375,
|
||
|
|
"completions/mean_terminated_length": 154.33334350585938,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 38.0,
|
||
|
|
"epoch": 0.032,
|
||
|
|
"grad_norm": 4.076399326324463,
|
||
|
|
"learning_rate": 9.499999999999999e-07,
|
||
|
|
"loss": 0.0987,
|
||
|
|
"num_tokens": 5181646.0,
|
||
|
|
"reward": 0.900390625,
|
||
|
|
"reward_std": 0.2040051966905594,
|
||
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6870080064589921,
|
||
|
|
"calib/avg_num_step_conf": 4.359375,
|
||
|
|
"calib/ece": 0.497578125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.55078125,
|
||
|
|
"calib/gap": 0.12012245172576197,
|
||
|
|
"calib/mean_conf": 0.845234375,
|
||
|
|
"calib/mu_c": 0.9235955056179775,
|
||
|
|
"calib/mu_w": 0.8034730538922156,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.497578125,
|
||
|
|
"calib/std_conf": 0.21170241807395437,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8163478260869564,
|
||
|
|
"calib/step_q_c_n": 345.0,
|
||
|
|
"calib/step_q_gap": 0.06533615293520545,
|
||
|
|
"calib/step_q_w": 0.751011673151751,
|
||
|
|
"calib/step_q_w_n": 771.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0336,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 5439662.0,
|
||
|
|
"reward": 0.84765625,
|
||
|
|
"reward_std": 0.11139655113220215,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6988704867060093,
|
||
|
|
"calib/avg_num_step_conf": 4.0625,
|
||
|
|
"calib/ece": 0.4556640625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.53515625,
|
||
|
|
"calib/gap": 0.1274427311377626,
|
||
|
|
"calib/mean_conf": 0.8580078124999999,
|
||
|
|
"calib/mu_c": 0.9341747572815535,
|
||
|
|
"calib/mu_w": 0.8067320261437909,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4556640625,
|
||
|
|
"calib/std_conf": 0.18236386172694644,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8324668435013264,
|
||
|
|
"calib/step_q_c_n": 377.0,
|
||
|
|
"calib/step_q_gap": 0.10385447547719373,
|
||
|
|
"calib/step_q_w": 0.7286123680241327,
|
||
|
|
"calib/step_q_w_n": 663.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0352,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.944444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 5701750.0,
|
||
|
|
"reward": 0.900390625,
|
||
|
|
"reward_std": 0.15003961324691772,
|
||
|
|
"rewards/accuracy_reward_step": 0.40234375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.693142361111111,
|
||
|
|
"calib/avg_num_step_conf": 3.8671875,
|
||
|
|
"calib/ece": 0.43999999999999995,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.6953125,
|
||
|
|
"calib/gap": 0.12210317460317455,
|
||
|
|
"calib/mean_conf": 0.87515625,
|
||
|
|
"calib/mu_c": 0.9438392857142858,
|
||
|
|
"calib/mu_w": 0.8217361111111112,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4388281249999999,
|
||
|
|
"calib/std_conf": 0.2005519118231923,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.865979381443299,
|
||
|
|
"calib/step_q_c_n": 388.0,
|
||
|
|
"calib/step_q_gap": 0.10102921533034215,
|
||
|
|
"calib/step_q_w": 0.7649501661129569,
|
||
|
|
"calib/step_q_w_n": 602.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0368,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.88888888888889e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 5961574.0,
|
||
|
|
"reward": 0.9375,
|
||
|
|
"reward_std": 0.09271685779094696,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7096158658658658,
|
||
|
|
"calib/avg_num_step_conf": 3.7578125,
|
||
|
|
"calib/ece": 0.4128125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.59765625,
|
||
|
|
"calib/gap": 0.1812012012012013,
|
||
|
|
"calib/mean_conf": 0.8346874999999999,
|
||
|
|
"calib/mu_c": 0.9394444444444445,
|
||
|
|
"calib/mu_w": 0.7582432432432432,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4128125,
|
||
|
|
"calib/std_conf": 0.24072623682048033,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8311286089238846,
|
||
|
|
"calib/step_q_c_n": 381.0,
|
||
|
|
"calib/step_q_gap": 0.10574134558481396,
|
||
|
|
"calib/step_q_w": 0.7253872633390707,
|
||
|
|
"calib/step_q_w_n": 581.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0384,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.833333333333332e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 6219486.0,
|
||
|
|
"reward": 0.91796875,
|
||
|
|
"reward_std": 0.15253356099128723,
|
||
|
|
"rewards/accuracy_reward_step": 0.421875,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6846522781774581,
|
||
|
|
"calib/avg_num_step_conf": 4.0703125,
|
||
|
|
"calib/ece": 0.385234375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.546875,
|
||
|
|
"calib/gap": 0.11813072618828024,
|
||
|
|
"calib/mean_conf": 0.8285937499999999,
|
||
|
|
"calib/mu_c": 0.8927350427350429,
|
||
|
|
"calib/mu_w": 0.7746043165467627,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3783984375,
|
||
|
|
"calib/std_conf": 0.2325579039958382,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8196636771300448,
|
||
|
|
"calib/step_q_c_n": 446.0,
|
||
|
|
"calib/step_q_gap": 0.09520058988172275,
|
||
|
|
"calib/step_q_w": 0.7244630872483221,
|
||
|
|
"calib/step_q_w_n": 596.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.04,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.777777777777778e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 6480774.0,
|
||
|
|
"reward": 0.95703125,
|
||
|
|
"reward_std": 0.14453580975532532,
|
||
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6871333964049195,
|
||
|
|
"calib/avg_num_step_conf": 3.8984375,
|
||
|
|
"calib/ece": 0.4467187499999999,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.57421875,
|
||
|
|
"calib/gap": 0.13802081362346263,
|
||
|
|
"calib/mean_conf": 0.8568749999999999,
|
||
|
|
"calib/mu_c": 0.9382857142857143,
|
||
|
|
"calib/mu_w": 0.8002649006622516,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4467187499999999,
|
||
|
|
"calib/std_conf": 0.21402851007517668,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8427653631284916,
|
||
|
|
"calib/step_q_c_n": 358.0,
|
||
|
|
"calib/step_q_gap": 0.10604661312849162,
|
||
|
|
"calib/step_q_w": 0.73671875,
|
||
|
|
"calib/step_q_w_n": 640.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0416,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 6740750.0,
|
||
|
|
"reward": 0.90625,
|
||
|
|
"reward_std": 0.1425735503435135,
|
||
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7142270861833105,
|
||
|
|
"calib/avg_num_step_conf": 3.91796875,
|
||
|
|
"calib/ece": 0.45214843750000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.3984375,
|
||
|
|
"calib/gap": 0.1802612859097128,
|
||
|
|
"calib/mean_conf": 0.7880859375,
|
||
|
|
"calib/mu_c": 0.9077906976744187,
|
||
|
|
"calib/mu_w": 0.7275294117647059,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.45214843750000006,
|
||
|
|
"calib/std_conf": 0.26491863513114,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.7893265993265993,
|
||
|
|
"calib/step_q_c_n": 297.0,
|
||
|
|
"calib/step_q_gap": 0.10204614606880891,
|
||
|
|
"calib/step_q_w": 0.6872804532577904,
|
||
|
|
"calib/step_q_w_n": 706.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0432,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.666666666666666e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 7002198.0,
|
||
|
|
"reward": 0.83203125,
|
||
|
|
"reward_std": 0.1539076715707779,
|
||
|
|
"rewards/accuracy_reward_step": 0.3359375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6436518124579743,
|
||
|
|
"calib/avg_num_step_conf": 3.91796875,
|
||
|
|
"calib/ece": 0.400703125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.58984375,
|
||
|
|
"calib/gap": 0.09549238951036132,
|
||
|
|
"calib/mean_conf": 0.8784375,
|
||
|
|
"calib/mu_c": 0.9280487804878049,
|
||
|
|
"calib/mu_w": 0.8325563909774436,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.3993359375,
|
||
|
|
"calib/std_conf": 0.18154474956260783,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.809004329004329,
|
||
|
|
"calib/step_q_c_n": 462.0,
|
||
|
|
"calib/step_q_gap": 0.04501172271967102,
|
||
|
|
"calib/step_q_w": 0.7639926062846579,
|
||
|
|
"calib/step_q_w_n": 541.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0448,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.61111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 7258086.0,
|
||
|
|
"reward": 0.974609375,
|
||
|
|
"reward_std": 0.14191211760044098,
|
||
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.5710377358490567,
|
||
|
|
"calib/avg_num_step_conf": 4.1171875,
|
||
|
|
"calib/ece": 0.47949218749999994,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.60546875,
|
||
|
|
"calib/gap": 0.025973584905660374,
|
||
|
|
"calib/mean_conf": 0.8935546875,
|
||
|
|
"calib/mu_c": 0.9087735849056605,
|
||
|
|
"calib/mu_w": 0.8828000000000001,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.47949218749999994,
|
||
|
|
"calib/std_conf": 0.1359380780788714,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8161576354679804,
|
||
|
|
"calib/step_q_c_n": 406.0,
|
||
|
|
"calib/step_q_gap": 0.013565042875387845,
|
||
|
|
"calib/step_q_w": 0.8025925925925925,
|
||
|
|
"calib/step_q_w_n": 648.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0464,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.555555555555556e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 7518990.0,
|
||
|
|
"reward": 0.9140625,
|
||
|
|
"reward_std": 0.20437544584274292,
|
||
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7290471785383904,
|
||
|
|
"calib/avg_num_step_conf": 3.73828125,
|
||
|
|
"calib/ece": 0.3382812500000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.47265625,
|
||
|
|
"calib/gap": 0.23330496453900718,
|
||
|
|
"calib/mean_conf": 0.7875,
|
||
|
|
"calib/mu_c": 0.916,
|
||
|
|
"calib/mu_w": 0.6826950354609929,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.3382812500000001,
|
||
|
|
"calib/std_conf": 0.28351725035701086,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.7971393643031784,
|
||
|
|
"calib/step_q_c_n": 409.0,
|
||
|
|
"calib/step_q_gap": 0.14967586065354344,
|
||
|
|
"calib/step_q_w": 0.647463503649635,
|
||
|
|
"calib/step_q_w_n": 548.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.048,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.499999999999999e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 7779406.0,
|
||
|
|
"reward": 0.9453125,
|
||
|
|
"reward_std": 0.15570057928562164,
|
||
|
|
"rewards/accuracy_reward_step": 0.44921875,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6527393469839512,
|
||
|
|
"calib/avg_num_step_conf": 4.13671875,
|
||
|
|
"calib/ece": 0.4433984375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.609375,
|
||
|
|
"calib/gap": 0.07098874746356765,
|
||
|
|
"calib/mean_conf": 0.9004296875,
|
||
|
|
"calib/mu_c": 0.9389743589743591,
|
||
|
|
"calib/mu_w": 0.8679856115107915,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4433984375000001,
|
||
|
|
"calib/std_conf": 0.12497582163623627,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8435280898876405,
|
||
|
|
"calib/step_q_c_n": 445.0,
|
||
|
|
"calib/step_q_gap": 0.0760362332101161,
|
||
|
|
"calib/step_q_w": 0.7674918566775244,
|
||
|
|
"calib/step_q_w_n": 614.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0496,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 8037134.0,
|
||
|
|
"reward": 0.95703125,
|
||
|
|
"reward_std": 0.23197564482688904,
|
||
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6696042150159944,
|
||
|
|
"calib/avg_num_step_conf": 3.9765625,
|
||
|
|
"calib/ece": 0.4343750000000002,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.6015625,
|
||
|
|
"calib/gap": 0.14466286144389395,
|
||
|
|
"calib/mean_conf": 0.85234375,
|
||
|
|
"calib/mu_c": 0.9365420560747665,
|
||
|
|
"calib/mu_w": 0.7918791946308725,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.4343750000000002,
|
||
|
|
"calib/std_conf": 0.2301612355196624,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.8250469483568076,
|
||
|
|
"calib/step_q_c_n": 426.0,
|
||
|
|
"calib/step_q_gap": 0.11391519160005081,
|
||
|
|
"calib/step_q_w": 0.7111317567567568,
|
||
|
|
"calib/step_q_w_n": 592.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0512,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.388888888888888e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 8298086.0,
|
||
|
|
"reward": 0.912109375,
|
||
|
|
"reward_std": 0.1129549890756607,
|
||
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7410264792415822,
|
||
|
|
"calib/avg_num_step_conf": 4.1171875,
|
||
|
|
"calib/ece": 0.4749609375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5,
|
||
|
|
"calib/gap": 0.1331435109512914,
|
||
|
|
"calib/mean_conf": 0.8460546875,
|
||
|
|
"calib/mu_c": 0.9297894736842106,
|
||
|
|
"calib/mu_w": 0.7966459627329192,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.4749609375000001,
|
||
|
|
"calib/std_conf": 0.18993127081730735,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8218933333333335,
|
||
|
|
"calib/step_q_c_n": 375.0,
|
||
|
|
"calib/step_q_gap": 0.09179024054982832,
|
||
|
|
"calib/step_q_w": 0.7301030927835052,
|
||
|
|
"calib/step_q_w_n": 679.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0528,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.333333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 8557862.0,
|
||
|
|
"reward": 0.869140625,
|
||
|
|
"reward_std": 0.240284264087677,
|
||
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7619836458480191,
|
||
|
|
"calib/avg_num_step_conf": 3.8671875,
|
||
|
|
"calib/ece": 0.48741176470588243,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.48627450980392156,
|
||
|
|
"calib/gap": 0.19402016072183847,
|
||
|
|
"calib/mean_conf": 0.8089803921568628,
|
||
|
|
"calib/mu_c": 0.9406097560975611,
|
||
|
|
"calib/mu_w": 0.7465895953757227,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.48741176470588243,
|
||
|
|
"calib/std_conf": 0.23772891879453528,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8473958333333333,
|
||
|
|
"calib/step_q_c_n": 288.0,
|
||
|
|
"calib/step_q_gap": 0.17088586182336185,
|
||
|
|
"calib/step_q_w": 0.6765099715099715,
|
||
|
|
"calib/step_q_w_n": 702.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 236.0,
|
||
|
|
"completions/max_terminated_length": 236.0,
|
||
|
|
"completions/mean_length": 0.921875,
|
||
|
|
"completions/mean_terminated_length": 236.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 236.0,
|
||
|
|
"epoch": 0.0544,
|
||
|
|
"grad_norm": 2.0495622158050537,
|
||
|
|
"learning_rate": 9.277777777777777e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 8818570.0,
|
||
|
|
"reward": 0.814453125,
|
||
|
|
"reward_std": 0.1730746477842331,
|
||
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7191611842105263,
|
||
|
|
"calib/avg_num_step_conf": 3.95703125,
|
||
|
|
"calib/ece": 0.27968750000000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.59765625,
|
||
|
|
"calib/gap": 0.14004048582995943,
|
||
|
|
"calib/mean_conf": 0.8734375,
|
||
|
|
"calib/mu_c": 0.930328947368421,
|
||
|
|
"calib/mu_w": 0.7902884615384616,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.27968750000000003,
|
||
|
|
"calib/std_conf": 0.18017325646096868,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8290814558058925,
|
||
|
|
"calib/step_q_c_n": 577.0,
|
||
|
|
"calib/step_q_gap": 0.10903558424625948,
|
||
|
|
"calib/step_q_w": 0.7200458715596331,
|
||
|
|
"calib/step_q_w_n": 436.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.056,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.222222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 9080714.0,
|
||
|
|
"reward": 1.091796875,
|
||
|
|
"reward_std": 0.11494496464729309,
|
||
|
|
"rewards/accuracy_reward_step": 0.59375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7023721275018532,
|
||
|
|
"calib/avg_num_step_conf": 3.796875,
|
||
|
|
"calib/ece": 0.3784375000000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.54296875,
|
||
|
|
"calib/gap": 0.18079565109957985,
|
||
|
|
"calib/mean_conf": 0.82375,
|
||
|
|
"calib/mu_c": 0.9240350877192981,
|
||
|
|
"calib/mu_w": 0.7432394366197183,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.3784375000000001,
|
||
|
|
"calib/std_conf": 0.24279621084357966,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8311666666666666,
|
||
|
|
"calib/step_q_c_n": 420.0,
|
||
|
|
"calib/step_q_gap": 0.09098550724637677,
|
||
|
|
"calib/step_q_w": 0.7401811594202898,
|
||
|
|
"calib/step_q_w_n": 552.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0576,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.166666666666665e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 9332594.0,
|
||
|
|
"reward": 0.94140625,
|
||
|
|
"reward_std": 0.2540907561779022,
|
||
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7154203323558163,
|
||
|
|
"calib/avg_num_step_conf": 3.90234375,
|
||
|
|
"calib/ece": 0.31472656250000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.52734375,
|
||
|
|
"calib/gap": 0.18695747800586504,
|
||
|
|
"calib/mean_conf": 0.8303515625,
|
||
|
|
"calib/mu_c": 0.9209090909090909,
|
||
|
|
"calib/mu_w": 0.7339516129032259,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.31472656250000003,
|
||
|
|
"calib/std_conf": 0.22341864745094261,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8170146137787057,
|
||
|
|
"calib/step_q_c_n": 479.0,
|
||
|
|
"calib/step_q_gap": 0.11362999839409027,
|
||
|
|
"calib/step_q_w": 0.7033846153846154,
|
||
|
|
"calib/step_q_w_n": 520.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0592,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.11111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 9593346.0,
|
||
|
|
"reward": 1.015625,
|
||
|
|
"reward_std": 0.14032596349716187,
|
||
|
|
"rewards/accuracy_reward_step": 0.515625,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7065397350993379,
|
||
|
|
"calib/avg_num_step_conf": 3.98046875,
|
||
|
|
"calib/ece": 0.469529411764706,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5490196078431373,
|
||
|
|
"calib/gap": 0.09894167091186956,
|
||
|
|
"calib/mean_conf": 0.8773725490196078,
|
||
|
|
"calib/mu_c": 0.9359615384615385,
|
||
|
|
"calib/mu_w": 0.837019867549669,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.469529411764706,
|
||
|
|
"calib/std_conf": 0.15290827197571927,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8486821705426356,
|
||
|
|
"calib/step_q_c_n": 387.0,
|
||
|
|
"calib/step_q_gap": 0.08409356294769899,
|
||
|
|
"calib/step_q_w": 0.7645886075949366,
|
||
|
|
"calib/step_q_w_n": 632.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0608,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.055555555555556e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 9854890.0,
|
||
|
|
"reward": 0.904296875,
|
||
|
|
"reward_std": 0.1345357596874237,
|
||
|
|
"rewards/accuracy_reward_step": 0.40625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6879616477272728,
|
||
|
|
"calib/avg_num_step_conf": 4.26953125,
|
||
|
|
"calib/ece": 0.5490625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.52734375,
|
||
|
|
"calib/gap": 0.10099999999999987,
|
||
|
|
"calib/mean_conf": 0.8615624999999999,
|
||
|
|
"calib/mu_c": 0.9309999999999998,
|
||
|
|
"calib/mu_w": 0.83,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.5490625,
|
||
|
|
"calib/std_conf": 0.1703532061446159,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8321254355400697,
|
||
|
|
"calib/step_q_c_n": 287.0,
|
||
|
|
"calib/step_q_gap": 0.0957110434805164,
|
||
|
|
"calib/step_q_w": 0.7364143920595533,
|
||
|
|
"calib/step_q_w_n": 806.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0624,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 10115306.0,
|
||
|
|
"reward": 0.8125,
|
||
|
|
"reward_std": 0.11705182492733002,
|
||
|
|
"rewards/accuracy_reward_step": 0.3125,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6907894736842105,
|
||
|
|
"calib/avg_num_step_conf": 3.921875,
|
||
|
|
"calib/ece": 0.3821093750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.48046875,
|
||
|
|
"calib/gap": 0.1520360761057571,
|
||
|
|
"calib/mean_conf": 0.827421875,
|
||
|
|
"calib/mu_c": 0.9117543859649122,
|
||
|
|
"calib/mu_w": 0.7597183098591551,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.3821093750000001,
|
||
|
|
"calib/std_conf": 0.20778123296266285,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8189731051344744,
|
||
|
|
"calib/step_q_c_n": 409.0,
|
||
|
|
"calib/step_q_gap": 0.09693949168909621,
|
||
|
|
"calib/step_q_w": 0.7220336134453782,
|
||
|
|
"calib/step_q_w_n": 595.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.064,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.944444444444445e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 10377042.0,
|
||
|
|
"reward": 0.94140625,
|
||
|
|
"reward_std": 0.15137451887130737,
|
||
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7688755020080322,
|
||
|
|
"calib/avg_num_step_conf": 3.65234375,
|
||
|
|
"calib/ece": 0.45261718750000013,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.97265625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.50390625,
|
||
|
|
"calib/gap": 0.20637349397590365,
|
||
|
|
"calib/mean_conf": 0.8041796874999999,
|
||
|
|
"calib/mu_c": 0.9380000000000001,
|
||
|
|
"calib/mu_w": 0.7316265060240964,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
||
|
|
"calib/pce": 0.45261718750000013,
|
||
|
|
"calib/std_conf": 0.2582439778337577,
|
||
|
|
"calib/step_conf_rate": 0.97265625,
|
||
|
|
"calib/step_q_c": 0.8210031347962383,
|
||
|
|
"calib/step_q_c_n": 319.0,
|
||
|
|
"calib/step_q_gap": 0.1073667711598747,
|
||
|
|
"calib/step_q_w": 0.7136363636363636,
|
||
|
|
"calib/step_q_w_n": 616.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0656,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.888888888888888e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 10639186.0,
|
||
|
|
"reward": 0.837890625,
|
||
|
|
"reward_std": 0.09316996484994888,
|
||
|
|
"rewards/accuracy_reward_step": 0.3515625,
|
||
|
|
"rewards/format_reward_step": 0.97265625,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6606522817460319,
|
||
|
|
"calib/avg_num_step_conf": 3.76171875,
|
||
|
|
"calib/ece": 0.2989843750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.59375,
|
||
|
|
"calib/gap": 0.1451785714285716,
|
||
|
|
"calib/mean_conf": 0.861484375,
|
||
|
|
"calib/mu_c": 0.9250000000000002,
|
||
|
|
"calib/mu_w": 0.7798214285714286,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.2989843750000001,
|
||
|
|
"calib/std_conf": 0.20859683693876896,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.832390438247012,
|
||
|
|
"calib/step_q_c_n": 502.0,
|
||
|
|
"calib/step_q_gap": 0.109744017422717,
|
||
|
|
"calib/step_q_w": 0.722646420824295,
|
||
|
|
"calib/step_q_w_n": 461.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0672,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.833333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 10896498.0,
|
||
|
|
"reward": 1.060546875,
|
||
|
|
"reward_std": 0.12770098447799683,
|
||
|
|
"rewards/accuracy_reward_step": 0.5625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6731284733573063,
|
||
|
|
"calib/avg_num_step_conf": 3.61328125,
|
||
|
|
"calib/ece": 0.4968359375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.50390625,
|
||
|
|
"calib/gap": 0.0898247793396535,
|
||
|
|
"calib/mean_conf": 0.8679296875,
|
||
|
|
"calib/mu_c": 0.924421052631579,
|
||
|
|
"calib/mu_w": 0.8345962732919255,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4968359375000001,
|
||
|
|
"calib/std_conf": 0.1801800611364985,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8369817073170731,
|
||
|
|
"calib/step_q_c_n": 328.0,
|
||
|
|
"calib/step_q_gap": 0.06959477264370628,
|
||
|
|
"calib/step_q_w": 0.7673869346733668,
|
||
|
|
"calib/step_q_w_n": 597.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0688,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.777777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 11158642.0,
|
||
|
|
"reward": 0.869140625,
|
||
|
|
"reward_std": 0.20872020721435547,
|
||
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6859880315762669,
|
||
|
|
"calib/avg_num_step_conf": 3.96875,
|
||
|
|
"calib/ece": 0.464140625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.60546875,
|
||
|
|
"calib/gap": 0.129842118665648,
|
||
|
|
"calib/mean_conf": 0.862578125,
|
||
|
|
"calib/mu_c": 0.9406862745098039,
|
||
|
|
"calib/mu_w": 0.8108441558441559,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.464140625,
|
||
|
|
"calib/std_conf": 0.2055489832655087,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8461064425770308,
|
||
|
|
"calib/step_q_c_n": 357.0,
|
||
|
|
"calib/step_q_gap": 0.11071949265290337,
|
||
|
|
"calib/step_q_w": 0.7353869499241275,
|
||
|
|
"calib/step_q_w_n": 659.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0704,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 11417162.0,
|
||
|
|
"reward": 0.89453125,
|
||
|
|
"reward_std": 0.189998596906662,
|
||
|
|
"rewards/accuracy_reward_step": 0.3984375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.5858134920634922,
|
||
|
|
"calib/avg_num_step_conf": 4.01171875,
|
||
|
|
"calib/ece": 0.43546874999999996,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.53515625,
|
||
|
|
"calib/gap": 0.045575396825396686,
|
||
|
|
"calib/mean_conf": 0.872578125,
|
||
|
|
"calib/mu_c": 0.8982142857142856,
|
||
|
|
"calib/mu_w": 0.852638888888889,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.43527343749999997,
|
||
|
|
"calib/std_conf": 0.1498345237970354,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.7948291571753987,
|
||
|
|
"calib/step_q_c_n": 439.0,
|
||
|
|
"calib/step_q_gap": 0.004369973501929358,
|
||
|
|
"calib/step_q_w": 0.7904591836734693,
|
||
|
|
"calib/step_q_w_n": 588.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.072,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.666666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 11671066.0,
|
||
|
|
"reward": 0.93359375,
|
||
|
|
"reward_std": 0.18399940431118011,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6808314234784822,
|
||
|
|
"calib/avg_num_step_conf": 3.90234375,
|
||
|
|
"calib/ece": 0.4376953125000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.515625,
|
||
|
|
"calib/gap": 0.14544563279857414,
|
||
|
|
"calib/mean_conf": 0.8361328124999998,
|
||
|
|
"calib/mu_c": 0.9236274509803923,
|
||
|
|
"calib/mu_w": 0.7781818181818182,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4376953125000001,
|
||
|
|
"calib/std_conf": 0.2194014539282724,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8291452991452992,
|
||
|
|
"calib/step_q_c_n": 351.0,
|
||
|
|
"calib/step_q_gap": 0.09433048433048441,
|
||
|
|
"calib/step_q_w": 0.7348148148148148,
|
||
|
|
"calib/step_q_w_n": 648.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0736,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.611111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 11925954.0,
|
||
|
|
"reward": 0.89453125,
|
||
|
|
"reward_std": 0.13965940475463867,
|
||
|
|
"rewards/accuracy_reward_step": 0.3984375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.72648835202761,
|
||
|
|
"calib/avg_num_step_conf": 3.4140625,
|
||
|
|
"calib/ece": 0.3258039215686274,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.96875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5843137254901961,
|
||
|
|
"calib/gap": 0.1812251941328733,
|
||
|
|
"calib/mean_conf": 0.8469803921568628,
|
||
|
|
"calib/mu_c": 0.9336842105263159,
|
||
|
|
"calib/mu_w": 0.7524590163934426,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.96875,
|
||
|
|
"calib/pce": 0.3256078431372549,
|
||
|
|
"calib/std_conf": 0.2385591956131872,
|
||
|
|
"calib/step_conf_rate": 0.96875,
|
||
|
|
"calib/step_q_c": 0.8489910313901344,
|
||
|
|
"calib/step_q_c_n": 446.0,
|
||
|
|
"calib/step_q_gap": 0.12798635849293805,
|
||
|
|
"calib/step_q_w": 0.7210046728971964,
|
||
|
|
"calib/step_q_w_n": 428.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 157.0,
|
||
|
|
"completions/max_terminated_length": 157.0,
|
||
|
|
"completions/mean_length": 0.61328125,
|
||
|
|
"completions/mean_terminated_length": 157.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 157.0,
|
||
|
|
"epoch": 0.0752,
|
||
|
|
"grad_norm": 2.0297513008117676,
|
||
|
|
"learning_rate": 8.555555555555555e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 12186583.0,
|
||
|
|
"reward": 1.00390625,
|
||
|
|
"reward_std": 0.14966705441474915,
|
||
|
|
"rewards/accuracy_reward_step": 0.51953125,
|
||
|
|
"rewards/format_reward_step": 0.96875,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6644511015415341,
|
||
|
|
"calib/avg_num_step_conf": 3.7734375,
|
||
|
|
"calib/ece": 0.38878906250000017,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.45703125,
|
||
|
|
"calib/gap": 0.16714909817137868,
|
||
|
|
"calib/mean_conf": 0.8145703125,
|
||
|
|
"calib/mu_c": 0.9105504587155964,
|
||
|
|
"calib/mu_w": 0.7434013605442177,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.38878906250000017,
|
||
|
|
"calib/std_conf": 0.2448257729869393,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.7859999999999999,
|
||
|
|
"calib/step_q_c_n": 365.0,
|
||
|
|
"calib/step_q_gap": 0.10676539101497495,
|
||
|
|
"calib/step_q_w": 0.679234608985025,
|
||
|
|
"calib/step_q_w_n": 601.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0768,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.499999999999999e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 12439255.0,
|
||
|
|
"reward": 0.91796875,
|
||
|
|
"reward_std": 0.12916389107704163,
|
||
|
|
"rewards/accuracy_reward_step": 0.42578125,
|
||
|
|
"rewards/format_reward_step": 0.984375,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7913225725725725,
|
||
|
|
"calib/avg_num_step_conf": 3.71484375,
|
||
|
|
"calib/ece": 0.3591372549019609,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.45098039215686275,
|
||
|
|
"calib/gap": 0.23496433933933936,
|
||
|
|
"calib/mean_conf": 0.7944313725490195,
|
||
|
|
"calib/mu_c": 0.9271171171171172,
|
||
|
|
"calib/mu_w": 0.6921527777777778,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
||
|
|
"calib/pce": 0.3591372549019609,
|
||
|
|
"calib/std_conf": 0.264440901504121,
|
||
|
|
"calib/step_conf_rate": 0.9765625,
|
||
|
|
"calib/step_q_c": 0.824221635883905,
|
||
|
|
"calib/step_q_c_n": 379.0,
|
||
|
|
"calib/step_q_gap": 0.13247338413565335,
|
||
|
|
"calib/step_q_w": 0.6917482517482516,
|
||
|
|
"calib/step_q_w_n": 572.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 53.0,
|
||
|
|
"completions/max_terminated_length": 53.0,
|
||
|
|
"completions/mean_length": 0.20703125,
|
||
|
|
"completions/mean_terminated_length": 53.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 53.0,
|
||
|
|
"epoch": 0.0784,
|
||
|
|
"grad_norm": 4.472797393798828,
|
||
|
|
"learning_rate": 8.444444444444444e-07,
|
||
|
|
"loss": 0.0189,
|
||
|
|
"num_tokens": 12697988.0,
|
||
|
|
"reward": 0.921875,
|
||
|
|
"reward_std": 0.1474648416042328,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.9765625,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.576751708984375,
|
||
|
|
"calib/avg_num_step_conf": 3.75390625,
|
||
|
|
"calib/ece": 0.37703124999999993,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.58203125,
|
||
|
|
"calib/gap": 0.07781249999999995,
|
||
|
|
"calib/mean_conf": 0.8727343749999998,
|
||
|
|
"calib/mu_c": 0.9116406249999999,
|
||
|
|
"calib/mu_w": 0.8338281249999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3748828125,
|
||
|
|
"calib/std_conf": 0.1664365252682216,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8098547717842323,
|
||
|
|
"calib/step_q_c_n": 482.0,
|
||
|
|
"calib/step_q_gap": 0.043529093287363874,
|
||
|
|
"calib/step_q_w": 0.7663256784968684,
|
||
|
|
"calib/step_q_w_n": 479.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.08,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.388888888888888e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 12959236.0,
|
||
|
|
"reward": 1.0,
|
||
|
|
"reward_std": 0.13269482553005219,
|
||
|
|
"rewards/accuracy_reward_step": 0.5,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6815934065934066,
|
||
|
|
"calib/avg_num_step_conf": 3.86328125,
|
||
|
|
"calib/ece": 0.36871093750000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.55078125,
|
||
|
|
"calib/gap": 0.10982051282051297,
|
||
|
|
"calib/mean_conf": 0.8608984375,
|
||
|
|
"calib/mu_c": 0.9166666666666667,
|
||
|
|
"calib/mu_w": 0.8068461538461538,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.36871093750000006,
|
||
|
|
"calib/std_conf": 0.18411415191412794,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8226382978723404,
|
||
|
|
"calib/step_q_c_n": 470.0,
|
||
|
|
"calib/step_q_gap": 0.07254195875866032,
|
||
|
|
"calib/step_q_w": 0.7500963391136801,
|
||
|
|
"calib/step_q_w_n": 519.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0816,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.333333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 13216020.0,
|
||
|
|
"reward": 0.98828125,
|
||
|
|
"reward_std": 0.17532755434513092,
|
||
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6790260099612617,
|
||
|
|
"calib/avg_num_step_conf": 3.4296875,
|
||
|
|
"calib/ece": 0.4078906250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5703125,
|
||
|
|
"calib/gap": 0.12646744143147004,
|
||
|
|
"calib/mean_conf": 0.8649218750000001,
|
||
|
|
"calib/mu_c": 0.9335897435897434,
|
||
|
|
"calib/mu_w": 0.8071223021582734,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4078906250000001,
|
||
|
|
"calib/std_conf": 0.17717618391444254,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8444444444444443,
|
||
|
|
"calib/step_q_c_n": 342.0,
|
||
|
|
"calib/step_q_gap": 0.10994817578772786,
|
||
|
|
"calib/step_q_w": 0.7344962686567165,
|
||
|
|
"calib/step_q_w_n": 536.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0832,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.277777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 13477620.0,
|
||
|
|
"reward": 0.955078125,
|
||
|
|
"reward_std": 0.08969886600971222,
|
||
|
|
"rewards/accuracy_reward_step": 0.45703125,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8072590738423029,
|
||
|
|
"calib/avg_num_step_conf": 3.796875,
|
||
|
|
"calib/ece": 0.48042968750000015,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.40234375,
|
||
|
|
"calib/gap": 0.2586889862327909,
|
||
|
|
"calib/mean_conf": 0.7460546875,
|
||
|
|
"calib/mu_c": 0.9360294117647059,
|
||
|
|
"calib/mu_w": 0.677340425531915,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.48042968750000015,
|
||
|
|
"calib/std_conf": 0.3078321074616443,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.828135593220339,
|
||
|
|
"calib/step_q_c_n": 236.0,
|
||
|
|
"calib/step_q_gap": 0.14579863669859994,
|
||
|
|
"calib/step_q_w": 0.6823369565217391,
|
||
|
|
"calib/step_q_w_n": 736.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0848,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.222222222222221e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 13739124.0,
|
||
|
|
"reward": 0.763671875,
|
||
|
|
"reward_std": 0.1348041594028473,
|
||
|
|
"rewards/accuracy_reward_step": 0.265625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6702647531856983,
|
||
|
|
"calib/avg_num_step_conf": 3.453125,
|
||
|
|
"calib/ece": 0.4116862745098041,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5529411764705883,
|
||
|
|
"calib/gap": 0.10485030310528254,
|
||
|
|
"calib/mean_conf": 0.8744313725490196,
|
||
|
|
"calib/mu_c": 0.9307627118644066,
|
||
|
|
"calib/mu_w": 0.8259124087591241,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.4116862745098041,
|
||
|
|
"calib/std_conf": 0.17598008520373032,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.8468266666666668,
|
||
|
|
"calib/step_q_c_n": 375.0,
|
||
|
|
"calib/step_q_gap": 0.09000937786509522,
|
||
|
|
"calib/step_q_w": 0.7568172888015716,
|
||
|
|
"calib/step_q_w_n": 509.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 18.0,
|
||
|
|
"completions/max_terminated_length": 18.0,
|
||
|
|
"completions/mean_length": 0.0703125,
|
||
|
|
"completions/mean_terminated_length": 18.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 18.0,
|
||
|
|
"epoch": 0.0864,
|
||
|
|
"grad_norm": 10.00183391571045,
|
||
|
|
"learning_rate": 8.166666666666666e-07,
|
||
|
|
"loss": 0.0253,
|
||
|
|
"num_tokens": 13999046.0,
|
||
|
|
"reward": 0.955078125,
|
||
|
|
"reward_std": 0.09943175315856934,
|
||
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.701171875,
|
||
|
|
"calib/avg_num_step_conf": 3.3984375,
|
||
|
|
"calib/ece": 0.32453125000000016,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5078125,
|
||
|
|
"calib/gap": 0.1929687499999999,
|
||
|
|
"calib/mean_conf": 0.8245312499999999,
|
||
|
|
"calib/mu_c": 0.9210156249999999,
|
||
|
|
"calib/mu_w": 0.728046875,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.32453125000000016,
|
||
|
|
"calib/std_conf": 0.24327633264137616,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.835586854460094,
|
||
|
|
"calib/step_q_c_n": 426.0,
|
||
|
|
"calib/step_q_gap": 0.11894271031594983,
|
||
|
|
"calib/step_q_w": 0.7166441441441441,
|
||
|
|
"calib/step_q_w_n": 444.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.088,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.11111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 14260494.0,
|
||
|
|
"reward": 0.99609375,
|
||
|
|
"reward_std": 0.16728200018405914,
|
||
|
|
"rewards/accuracy_reward_step": 0.5,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7270757020757022,
|
||
|
|
"calib/avg_num_step_conf": 3.45703125,
|
||
|
|
"calib/ece": 0.3310937500000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.515625,
|
||
|
|
"calib/gap": 0.1783589743589744,
|
||
|
|
"calib/mean_conf": 0.83890625,
|
||
|
|
"calib/mu_c": 0.9266923076923078,
|
||
|
|
"calib/mu_w": 0.7483333333333334,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3310937500000001,
|
||
|
|
"calib/std_conf": 0.2232387118331798,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8484615384615385,
|
||
|
|
"calib/step_q_c_n": 403.0,
|
||
|
|
"calib/step_q_gap": 0.12806734759016913,
|
||
|
|
"calib/step_q_w": 0.7203941908713694,
|
||
|
|
"calib/step_q_w_n": 482.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0896,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.055555555555556e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 14522638.0,
|
||
|
|
"reward": 1.0078125,
|
||
|
|
"reward_std": 0.15558436512947083,
|
||
|
|
"rewards/accuracy_reward_step": 0.5078125,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7003732287449392,
|
||
|
|
"calib/avg_num_step_conf": 3.80078125,
|
||
|
|
"calib/ece": 0.41835937500000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.4453125,
|
||
|
|
"calib/gap": 0.12768218623481775,
|
||
|
|
"calib/mean_conf": 0.822265625,
|
||
|
|
"calib/mu_c": 0.8980769230769231,
|
||
|
|
"calib/mu_w": 0.7703947368421054,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.41718750000000004,
|
||
|
|
"calib/std_conf": 0.2046766307944299,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8142191780821919,
|
||
|
|
"calib/step_q_c_n": 365.0,
|
||
|
|
"calib/step_q_gap": 0.08412049387166565,
|
||
|
|
"calib/step_q_w": 0.7300986842105263,
|
||
|
|
"calib/step_q_w_n": 608.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0912,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 14777966.0,
|
||
|
|
"reward": 0.90625,
|
||
|
|
"reward_std": 0.18083682656288147,
|
||
|
|
"rewards/accuracy_reward_step": 0.40625,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.5885396825396825,
|
||
|
|
"calib/avg_num_step_conf": 3.75,
|
||
|
|
"calib/ece": 0.43050980392156885,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5019607843137255,
|
||
|
|
"calib/gap": 0.1133523809523811,
|
||
|
|
"calib/mean_conf": 0.8422745098039216,
|
||
|
|
"calib/mu_c": 0.9089523809523811,
|
||
|
|
"calib/mu_w": 0.7956,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.43050980392156885,
|
||
|
|
"calib/std_conf": 0.20675343641133745,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.819568733153639,
|
||
|
|
"calib/step_q_c_n": 371.0,
|
||
|
|
"calib/step_q_gap": 0.08191168731323151,
|
||
|
|
"calib/step_q_w": 0.7376570458404075,
|
||
|
|
"calib/step_q_w_n": 589.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0928,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.944444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 15037142.0,
|
||
|
|
"reward": 0.90625,
|
||
|
|
"reward_std": 0.17768144607543945,
|
||
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6769931227712686,
|
||
|
|
"calib/avg_num_step_conf": 3.5390625,
|
||
|
|
"calib/ece": 0.3987450980392158,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.36470588235294116,
|
||
|
|
"calib/gap": 0.16114875191034128,
|
||
|
|
"calib/mean_conf": 0.8018823529411765,
|
||
|
|
"calib/mu_c": 0.8973076923076923,
|
||
|
|
"calib/mu_w": 0.736158940397351,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.39639215686274526,
|
||
|
|
"calib/std_conf": 0.2409108245281972,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.8240173410404624,
|
||
|
|
"calib/step_q_c_n": 346.0,
|
||
|
|
"calib/step_q_gap": 0.10133876961189114,
|
||
|
|
"calib/step_q_w": 0.7226785714285713,
|
||
|
|
"calib/step_q_w_n": 560.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 118.0,
|
||
|
|
"completions/max_terminated_length": 118.0,
|
||
|
|
"completions/mean_length": 0.4609375,
|
||
|
|
"completions/mean_terminated_length": 118.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 118.0,
|
||
|
|
"epoch": 0.0944,
|
||
|
|
"grad_norm": 3.116136074066162,
|
||
|
|
"learning_rate": 7.888888888888889e-07,
|
||
|
|
"loss": 0.0189,
|
||
|
|
"num_tokens": 15299268.0,
|
||
|
|
"reward": 0.8984375,
|
||
|
|
"reward_std": 0.16622620820999146,
|
||
|
|
"rewards/accuracy_reward_step": 0.40625,
|
||
|
|
"rewards/format_reward_step": 0.984375,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7112884160756502,
|
||
|
|
"calib/avg_num_step_conf": 3.8828125,
|
||
|
|
"calib/ece": 0.4412890625000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.421875,
|
||
|
|
"calib/gap": 0.16732466509062272,
|
||
|
|
"calib/mean_conf": 0.8084765624999999,
|
||
|
|
"calib/mu_c": 0.9143617021276598,
|
||
|
|
"calib/mu_w": 0.747037037037037,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4412890625000001,
|
||
|
|
"calib/std_conf": 0.24047565056193024,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8276685393258426,
|
||
|
|
"calib/step_q_c_n": 356.0,
|
||
|
|
"calib/step_q_gap": 0.13594440139480812,
|
||
|
|
"calib/step_q_w": 0.6917241379310345,
|
||
|
|
"calib/step_q_w_n": 638.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.096,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.833333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 15561412.0,
|
||
|
|
"reward": 0.865234375,
|
||
|
|
"reward_std": 0.15387766063213348,
|
||
|
|
"rewards/accuracy_reward_step": 0.3671875,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.749526717557252,
|
||
|
|
"calib/avg_num_step_conf": 3.421875,
|
||
|
|
"calib/ece": 0.303359375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.96875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.47265625,
|
||
|
|
"calib/gap": 0.21300396946564892,
|
||
|
|
"calib/mean_conf": 0.8150781249999999,
|
||
|
|
"calib/mu_c": 0.9190839694656489,
|
||
|
|
"calib/mu_w": 0.7060799999999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.96875,
|
||
|
|
"calib/pce": 0.303359375,
|
||
|
|
"calib/std_conf": 0.2498999677800787,
|
||
|
|
"calib/step_conf_rate": 0.96875,
|
||
|
|
"calib/step_q_c": 0.8293488372093023,
|
||
|
|
"calib/step_q_c_n": 430.0,
|
||
|
|
"calib/step_q_gap": 0.12147888205235169,
|
||
|
|
"calib/step_q_w": 0.7078699551569506,
|
||
|
|
"calib/step_q_w_n": 446.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0976,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.777777777777778e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 15823036.0,
|
||
|
|
"reward": 0.99609375,
|
||
|
|
"reward_std": 0.16821058094501495,
|
||
|
|
"rewards/accuracy_reward_step": 0.51171875,
|
||
|
|
"rewards/format_reward_step": 0.96875,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.687896728515625,
|
||
|
|
"calib/avg_num_step_conf": 3.578125,
|
||
|
|
"calib/ece": 0.3723437499999999,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.5703125,
|
||
|
|
"calib/gap": 0.11531249999999993,
|
||
|
|
"calib/mean_conf": 0.864921875,
|
||
|
|
"calib/mu_c": 0.9225781249999998,
|
||
|
|
"calib/mu_w": 0.8072656249999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.3686328124999999,
|
||
|
|
"calib/std_conf": 0.20250962976235073,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8241387559808613,
|
||
|
|
"calib/step_q_c_n": 418.0,
|
||
|
|
"calib/step_q_gap": 0.03986164754712651,
|
||
|
|
"calib/step_q_w": 0.7842771084337348,
|
||
|
|
"calib/step_q_w_n": 498.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.0992,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 16084228.0,
|
||
|
|
"reward": 0.998046875,
|
||
|
|
"reward_std": 0.1526850312948227,
|
||
|
|
"rewards/accuracy_reward_step": 0.5,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6866692601958115,
|
||
|
|
"calib/avg_num_step_conf": 3.515625,
|
||
|
|
"calib/ece": 0.4001953125000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.3359375,
|
||
|
|
"calib/gap": 0.18286909161641685,
|
||
|
|
"calib/mean_conf": 0.7791015625,
|
||
|
|
"calib/mu_c": 0.8926804123711339,
|
||
|
|
"calib/mu_w": 0.7098113207547171,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.4001953125000001,
|
||
|
|
"calib/std_conf": 0.2565858573364062,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.7998165137614678,
|
||
|
|
"calib/step_q_c_n": 327.0,
|
||
|
|
"calib/step_q_gap": 0.0947903357509966,
|
||
|
|
"calib/step_q_w": 0.7050261780104712,
|
||
|
|
"calib/step_q_w_n": 573.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1008,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.666666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 16338804.0,
|
||
|
|
"reward": 0.873046875,
|
||
|
|
"reward_std": 0.25302910804748535,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7974129546780783,
|
||
|
|
"calib/avg_num_step_conf": 3.62890625,
|
||
|
|
"calib/ece": 0.3880468750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.37109375,
|
||
|
|
"calib/gap": 0.23496206963625754,
|
||
|
|
"calib/mean_conf": 0.7669531249999999,
|
||
|
|
"calib/mu_c": 0.9128865979381443,
|
||
|
|
"calib/mu_w": 0.6779245283018868,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.3880468750000001,
|
||
|
|
"calib/std_conf": 0.279721006634708,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8299152542372882,
|
||
|
|
"calib/step_q_c_n": 354.0,
|
||
|
|
"calib/step_q_gap": 0.16796742815033172,
|
||
|
|
"calib/step_q_w": 0.6619478260869565,
|
||
|
|
"calib/step_q_w_n": 575.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1024,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.61111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 16600900.0,
|
||
|
|
"reward": 0.875,
|
||
|
|
"reward_std": 0.13151776790618896,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7814169800118742,
|
||
|
|
"calib/avg_num_step_conf": 3.47265625,
|
||
|
|
"calib/ece": 0.4176953125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.375,
|
||
|
|
"calib/gap": 0.2235787321063395,
|
||
|
|
"calib/mean_conf": 0.7809765624999999,
|
||
|
|
"calib/mu_c": 0.9233333333333333,
|
||
|
|
"calib/mu_w": 0.6997546012269938,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4176953125,
|
||
|
|
"calib/std_conf": 0.25815644375200786,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8218688524590163,
|
||
|
|
"calib/step_q_c_n": 305.0,
|
||
|
|
"calib/step_q_gap": 0.1455674825960025,
|
||
|
|
"calib/step_q_w": 0.6763013698630138,
|
||
|
|
"calib/step_q_w_n": 584.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.104,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.555555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 16857884.0,
|
||
|
|
"reward": 0.859375,
|
||
|
|
"reward_std": 0.1349327117204666,
|
||
|
|
"rewards/accuracy_reward_step": 0.36328125,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6733002740441081,
|
||
|
|
"calib/avg_num_step_conf": 3.58203125,
|
||
|
|
"calib/ece": 0.39203921568627464,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.403921568627451,
|
||
|
|
"calib/gap": 0.2015600939579798,
|
||
|
|
"calib/mean_conf": 0.7724313725490196,
|
||
|
|
"calib/mu_c": 0.8973195876288659,
|
||
|
|
"calib/mu_w": 0.6957594936708861,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.39203921568627464,
|
||
|
|
"calib/std_conf": 0.2766925989430885,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.8080466472303206,
|
||
|
|
"calib/step_q_c_n": 343.0,
|
||
|
|
"calib/step_q_gap": 0.11146128137666211,
|
||
|
|
"calib/step_q_w": 0.6965853658536585,
|
||
|
|
"calib/step_q_w_n": 574.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 367.0,
|
||
|
|
"completions/max_terminated_length": 367.0,
|
||
|
|
"completions/mean_length": 1.43359375,
|
||
|
|
"completions/mean_terminated_length": 367.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 367.0,
|
||
|
|
"epoch": 0.1056,
|
||
|
|
"grad_norm": 2.5103132724761963,
|
||
|
|
"learning_rate": 7.5e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 17118907.0,
|
||
|
|
"reward": 0.87109375,
|
||
|
|
"reward_std": 0.1428884118795395,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.984375,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6894213381555154,
|
||
|
|
"calib/avg_num_step_conf": 3.375,
|
||
|
|
"calib/ece": 0.42974609375000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.97265625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.421875,
|
||
|
|
"calib/gap": 0.16449302505812435,
|
||
|
|
"calib/mean_conf": 0.81255859375,
|
||
|
|
"calib/mu_c": 0.9140816326530613,
|
||
|
|
"calib/mu_w": 0.749588607594937,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
||
|
|
"calib/pce": 0.42974609375000006,
|
||
|
|
"calib/std_conf": 0.2582380890365952,
|
||
|
|
"calib/step_conf_rate": 0.9765625,
|
||
|
|
"calib/step_q_c": 0.837439446366782,
|
||
|
|
"calib/step_q_c_n": 289.0,
|
||
|
|
"calib/step_q_gap": 0.14036118549721677,
|
||
|
|
"calib/step_q_w": 0.6970782608695653,
|
||
|
|
"calib/step_q_w_n": 575.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1072,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 17379875.0,
|
||
|
|
"reward": 0.869140625,
|
||
|
|
"reward_std": 0.1348200887441635,
|
||
|
|
"rewards/accuracy_reward_step": 0.3828125,
|
||
|
|
"rewards/format_reward_step": 0.97265625,
|
||
|
|
"step": 67
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7861512111573281,
|
||
|
|
"calib/avg_num_step_conf": 3.640625,
|
||
|
|
"calib/ece": 0.3138671875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.2890625,
|
||
|
|
"calib/gap": 0.18239417665769486,
|
||
|
|
"calib/mean_conf": 0.7904296875000001,
|
||
|
|
"calib/mu_c": 0.8859016393442621,
|
||
|
|
"calib/mu_w": 0.7035074626865673,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3138671875,
|
||
|
|
"calib/std_conf": 0.2149768586235559,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.7975514874141876,
|
||
|
|
"calib/step_q_c_n": 437.0,
|
||
|
|
"calib/step_q_gap": 0.11181411367681382,
|
||
|
|
"calib/step_q_w": 0.6857373737373738,
|
||
|
|
"calib/step_q_w_n": 495.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1088,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.388888888888889e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 17640571.0,
|
||
|
|
"reward": 0.97265625,
|
||
|
|
"reward_std": 0.24158510565757751,
|
||
|
|
"rewards/accuracy_reward_step": 0.4765625,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 68
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7416682357371492,
|
||
|
|
"calib/avg_num_step_conf": 3.40625,
|
||
|
|
"calib/ece": 0.3035433070866143,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.4448818897637795,
|
||
|
|
"calib/gap": 0.13022406326492197,
|
||
|
|
"calib/mean_conf": 0.8586614173228345,
|
||
|
|
"calib/mu_c": 0.9165957446808511,
|
||
|
|
"calib/mu_w": 0.7863716814159292,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.3035433070866143,
|
||
|
|
"calib/std_conf": 0.16534079434657542,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.830427927927928,
|
||
|
|
"calib/step_q_c_n": 444.0,
|
||
|
|
"calib/step_q_gap": 0.076105498021386,
|
||
|
|
"calib/step_q_w": 0.754322429906542,
|
||
|
|
"calib/step_q_w_n": 428.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 211.0,
|
||
|
|
"completions/max_terminated_length": 211.0,
|
||
|
|
"completions/mean_length": 1.640625,
|
||
|
|
"completions/mean_terminated_length": 210.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 209.0,
|
||
|
|
"epoch": 0.1104,
|
||
|
|
"grad_norm": 2.5319273471832275,
|
||
|
|
"learning_rate": 7.333333333333332e-07,
|
||
|
|
"loss": 0.0773,
|
||
|
|
"num_tokens": 17902023.0,
|
||
|
|
"reward": 1.041015625,
|
||
|
|
"reward_std": 0.14334642887115479,
|
||
|
|
"rewards/accuracy_reward_step": 0.55078125,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6981770833333334,
|
||
|
|
"calib/avg_num_step_conf": 3.31640625,
|
||
|
|
"calib/ece": 0.4160937500000004,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.375,
|
||
|
|
"calib/gap": 0.16408333333333325,
|
||
|
|
"calib/mean_conf": 0.79109375,
|
||
|
|
"calib/mu_c": 0.8936458333333333,
|
||
|
|
"calib/mu_w": 0.7295625,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4160937500000004,
|
||
|
|
"calib/std_conf": 0.23735928191443767,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8111678832116788,
|
||
|
|
"calib/step_q_c_n": 274.0,
|
||
|
|
"calib/step_q_gap": 0.11728962234211349,
|
||
|
|
"calib/step_q_w": 0.6938782608695653,
|
||
|
|
"calib/step_q_w_n": 575.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.112,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.277777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 18163127.0,
|
||
|
|
"reward": 0.87109375,
|
||
|
|
"reward_std": 0.14940109848976135,
|
||
|
|
"rewards/accuracy_reward_step": 0.375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7026192241166296,
|
||
|
|
"calib/avg_num_step_conf": 3.328125,
|
||
|
|
"calib/ece": 0.2819921875000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.41015625,
|
||
|
|
"calib/gap": 0.16275142080553506,
|
||
|
|
"calib/mean_conf": 0.8366796875,
|
||
|
|
"calib/mu_c": 0.9091549295774649,
|
||
|
|
"calib/mu_w": 0.7464035087719298,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.2819921875000001,
|
||
|
|
"calib/std_conf": 0.20704088642078003,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.814378947368421,
|
||
|
|
"calib/step_q_c_n": 475.0,
|
||
|
|
"calib/step_q_gap": 0.08575825771324841,
|
||
|
|
"calib/step_q_w": 0.7286206896551726,
|
||
|
|
"calib/step_q_w_n": 377.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1136,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.222222222222221e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 18419703.0,
|
||
|
|
"reward": 1.052734375,
|
||
|
|
"reward_std": 0.20845818519592285,
|
||
|
|
"rewards/accuracy_reward_step": 0.5546875,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 71
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7274323966065748,
|
||
|
|
"calib/avg_num_step_conf": 3.3359375,
|
||
|
|
"calib/ece": 0.4427734375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.33984375,
|
||
|
|
"calib/gap": 0.18158801696712623,
|
||
|
|
"calib/mean_conf": 0.8021484375000001,
|
||
|
|
"calib/mu_c": 0.9184782608695654,
|
||
|
|
"calib/mu_w": 0.7368902439024392,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.4427734375000001,
|
||
|
|
"calib/std_conf": 0.23878859602440944,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8430693069306932,
|
||
|
|
"calib/step_q_c_n": 303.0,
|
||
|
|
"calib/step_q_gap": 0.12232520529729929,
|
||
|
|
"calib/step_q_w": 0.7207441016333939,
|
||
|
|
"calib/step_q_w_n": 551.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1152,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.166666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 18676799.0,
|
||
|
|
"reward": 0.857421875,
|
||
|
|
"reward_std": 0.09679568558931351,
|
||
|
|
"rewards/accuracy_reward_step": 0.359375,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 72
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6822100313479624,
|
||
|
|
"calib/avg_num_step_conf": 3.515625,
|
||
|
|
"calib/ece": 0.53125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.30859375,
|
||
|
|
"calib/gap": 0.18049460118425653,
|
||
|
|
"calib/mean_conf": 0.7578125,
|
||
|
|
"calib/mu_c": 0.8974137931034484,
|
||
|
|
"calib/mu_w": 0.7169191919191918,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.53125,
|
||
|
|
"calib/std_conf": 0.26937264355117796,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.8082198952879581,
|
||
|
|
"calib/step_q_c_n": 191.0,
|
||
|
|
"calib/step_q_gap": 0.11064584733309213,
|
||
|
|
"calib/step_q_w": 0.697574047954866,
|
||
|
|
"calib/step_q_w_n": 709.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1168,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.111111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 18935623.0,
|
||
|
|
"reward": 0.71875,
|
||
|
|
"reward_std": 0.10149794071912766,
|
||
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
||
|
|
"rewards/format_reward_step": 0.984375,
|
||
|
|
"step": 73
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.76161891206139,
|
||
|
|
"calib/avg_num_step_conf": 3.40234375,
|
||
|
|
"calib/ece": 0.32378906250000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.30078125,
|
||
|
|
"calib/gap": 0.21693112197536957,
|
||
|
|
"calib/mean_conf": 0.7651953125,
|
||
|
|
"calib/mu_c": 0.8863716814159291,
|
||
|
|
"calib/mu_w": 0.6694405594405596,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
||
|
|
"calib/pce": 0.32378906250000006,
|
||
|
|
"calib/std_conf": 0.25465502273276946,
|
||
|
|
"calib/step_conf_rate": 0.98046875,
|
||
|
|
"calib/step_q_c": 0.8073655913978496,
|
||
|
|
"calib/step_q_c_n": 372.0,
|
||
|
|
"calib/step_q_gap": 0.1391491585321183,
|
||
|
|
"calib/step_q_w": 0.6682164328657313,
|
||
|
|
"calib/step_q_w_n": 499.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1184,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.055555555555556e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 19191303.0,
|
||
|
|
"reward": 0.931640625,
|
||
|
|
"reward_std": 0.17043370008468628,
|
||
|
|
"rewards/accuracy_reward_step": 0.44140625,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 74
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7952186805040772,
|
||
|
|
"calib/avg_num_step_conf": 3.37109375,
|
||
|
|
"calib/ece": 0.29574218750000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.30078125,
|
||
|
|
"calib/gap": 0.2813306152705708,
|
||
|
|
"calib/mean_conf": 0.7410546874999999,
|
||
|
|
"calib/mu_c": 0.8971052631578947,
|
||
|
|
"calib/mu_w": 0.615774647887324,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.29574218750000003,
|
||
|
|
"calib/std_conf": 0.28964697824986424,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8019251336898396,
|
||
|
|
"calib/step_q_c_n": 374.0,
|
||
|
|
"calib/step_q_gap": 0.15053454064280491,
|
||
|
|
"calib/step_q_w": 0.6513905930470347,
|
||
|
|
"calib/step_q_w_n": 489.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.12,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 19450407.0,
|
||
|
|
"reward": 0.943359375,
|
||
|
|
"reward_std": 0.1340028941631317,
|
||
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7223510971786834,
|
||
|
|
"calib/avg_num_step_conf": 3.32421875,
|
||
|
|
"calib/ece": 0.37588235294117656,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.38823529411764707,
|
||
|
|
"calib/gap": 0.17221630094043883,
|
||
|
|
"calib/mean_conf": 0.8072549019607844,
|
||
|
|
"calib/mu_c": 0.9051818181818182,
|
||
|
|
"calib/mu_w": 0.7329655172413794,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.37588235294117656,
|
||
|
|
"calib/std_conf": 0.23130387814431091,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8194827586206896,
|
||
|
|
"calib/step_q_c_n": 348.0,
|
||
|
|
"calib/step_q_gap": 0.10007918009186267,
|
||
|
|
"calib/step_q_w": 0.719403578528827,
|
||
|
|
"calib/step_q_w_n": 503.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 142.0,
|
||
|
|
"completions/max_terminated_length": 142.0,
|
||
|
|
"completions/mean_length": 0.5546875,
|
||
|
|
"completions/mean_terminated_length": 142.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 142.0,
|
||
|
|
"epoch": 0.1216,
|
||
|
|
"grad_norm": 2.447387456893921,
|
||
|
|
"learning_rate": 6.944444444444444e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 19710389.0,
|
||
|
|
"reward": 0.9296875,
|
||
|
|
"reward_std": 0.15110857784748077,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.9921875,
|
||
|
|
"step": 76
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6762873882207833,
|
||
|
|
"calib/avg_num_step_conf": 3.45703125,
|
||
|
|
"calib/ece": 0.3690234375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 1.0,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.33984375,
|
||
|
|
"calib/gap": 0.14401911810052415,
|
||
|
|
"calib/mean_conf": 0.8182421874999999,
|
||
|
|
"calib/mu_c": 0.8975652173913042,
|
||
|
|
"calib/mu_w": 0.7535460992907801,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
||
|
|
"calib/pce": 0.3690234375000001,
|
||
|
|
"calib/std_conf": 0.20544833771587162,
|
||
|
|
"calib/step_conf_rate": 1.0,
|
||
|
|
"calib/step_q_c": 0.8123118279569894,
|
||
|
|
"calib/step_q_c_n": 372.0,
|
||
|
|
"calib/step_q_gap": 0.07283814374646314,
|
||
|
|
"calib/step_q_w": 0.7394736842105263,
|
||
|
|
"calib/step_q_w_n": 513.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1232,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.888888888888889e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 19969821.0,
|
||
|
|
"reward": 0.94921875,
|
||
|
|
"reward_std": 0.06009919196367264,
|
||
|
|
"rewards/accuracy_reward_step": 0.44921875,
|
||
|
|
"rewards/format_reward_step": 1.0,
|
||
|
|
"step": 77
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.6754556419190565,
|
||
|
|
"calib/avg_num_step_conf": 3.30078125,
|
||
|
|
"calib/ece": 0.4238823529411765,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.33725490196078434,
|
||
|
|
"calib/gap": 0.16594813722862511,
|
||
|
|
"calib/mean_conf": 0.7807450980392155,
|
||
|
|
"calib/mu_c": 0.8874725274725276,
|
||
|
|
"calib/mu_w": 0.7215243902439025,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.4238823529411765,
|
||
|
|
"calib/std_conf": 0.2421145283309367,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8049163879598663,
|
||
|
|
"calib/step_q_c_n": 299.0,
|
||
|
|
"calib/step_q_gap": 0.10936693741041581,
|
||
|
|
"calib/step_q_w": 0.6955494505494505,
|
||
|
|
"calib/step_q_w_n": 546.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 193.0,
|
||
|
|
"completions/max_terminated_length": 193.0,
|
||
|
|
"completions/mean_length": 0.75390625,
|
||
|
|
"completions/mean_terminated_length": 193.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 193.0,
|
||
|
|
"epoch": 0.1248,
|
||
|
|
"grad_norm": 4.708121299743652,
|
||
|
|
"learning_rate": 6.833333333333333e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 20226030.0,
|
||
|
|
"reward": 0.849609375,
|
||
|
|
"reward_std": 0.14808812737464905,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 78
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7350085665334095,
|
||
|
|
"calib/avg_num_step_conf": 2.75390625,
|
||
|
|
"calib/ece": 0.401640625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.33984375,
|
||
|
|
"calib/gap": 0.1898940288089349,
|
||
|
|
"calib/mean_conf": 0.803984375,
|
||
|
|
"calib/mu_c": 0.9174757281553401,
|
||
|
|
"calib/mu_w": 0.7275816993464052,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
||
|
|
"calib/pce": 0.401640625,
|
||
|
|
"calib/std_conf": 0.24545028723930915,
|
||
|
|
"calib/step_conf_rate": 0.98046875,
|
||
|
|
"calib/step_q_c": 0.8561290322580645,
|
||
|
|
"calib/step_q_c_n": 248.0,
|
||
|
|
"calib/step_q_gap": 0.11291240206112796,
|
||
|
|
"calib/step_q_w": 0.7432166301969365,
|
||
|
|
"calib/step_q_w_n": 457.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1264,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.777777777777778e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 20480094.0,
|
||
|
|
"reward": 0.892578125,
|
||
|
|
"reward_std": 0.0921671986579895,
|
||
|
|
"rewards/accuracy_reward_step": 0.40234375,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 79
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7242817735315884,
|
||
|
|
"calib/avg_num_step_conf": 3.1953125,
|
||
|
|
"calib/ece": 0.45742187500000014,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.296875,
|
||
|
|
"calib/gap": 0.15274305321940385,
|
||
|
|
"calib/mean_conf": 0.805078125,
|
||
|
|
"calib/mu_c": 0.9047191011235954,
|
||
|
|
"calib/mu_w": 0.7519760479041916,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.45742187500000014,
|
||
|
|
"calib/std_conf": 0.2142045462787482,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.8306181818181819,
|
||
|
|
"calib/step_q_c_n": 275.0,
|
||
|
|
"calib/step_q_gap": 0.08233088900050234,
|
||
|
|
"calib/step_q_w": 0.7482872928176796,
|
||
|
|
"calib/step_q_w_n": 543.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.128,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 20741550.0,
|
||
|
|
"reward": 0.83984375,
|
||
|
|
"reward_std": 0.1633341908454895,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 0.984375,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7733983983983984,
|
||
|
|
"calib/avg_num_step_conf": 2.89453125,
|
||
|
|
"calib/ece": 0.3342578125000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.21875,
|
||
|
|
"calib/gap": 0.213455955955956,
|
||
|
|
"calib/mean_conf": 0.7561328125,
|
||
|
|
"calib/mu_c": 0.8795370370370372,
|
||
|
|
"calib/mu_w": 0.6660810810810812,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.3342578125000001,
|
||
|
|
"calib/std_conf": 0.25879129716016314,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.803051948051948,
|
||
|
|
"calib/step_q_c_n": 308.0,
|
||
|
|
"calib/step_q_gap": 0.12961083950691332,
|
||
|
|
"calib/step_q_w": 0.6734411085450347,
|
||
|
|
"calib/step_q_w_n": 433.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1296,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.666666666666666e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 20996598.0,
|
||
|
|
"reward": 0.916015625,
|
||
|
|
"reward_std": 0.18609514832496643,
|
||
|
|
"rewards/accuracy_reward_step": 0.421875,
|
||
|
|
"rewards/format_reward_step": 0.98828125,
|
||
|
|
"step": 81
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.8024912168636218,
|
||
|
|
"calib/avg_num_step_conf": 2.734375,
|
||
|
|
"calib/ece": 0.29730468750000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.21484375,
|
||
|
|
"calib/gap": 0.34462408176301507,
|
||
|
|
"calib/mean_conf": 0.6918359375,
|
||
|
|
"calib/mu_c": 0.9004950495049505,
|
||
|
|
"calib/mu_w": 0.5558709677419355,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
||
|
|
"calib/pce": 0.29730468750000005,
|
||
|
|
"calib/std_conf": 0.3326998759520299,
|
||
|
|
"calib/step_conf_rate": 0.9921875,
|
||
|
|
"calib/step_q_c": 0.8439405204460966,
|
||
|
|
"calib/step_q_c_n": 269.0,
|
||
|
|
"calib/step_q_gap": 0.21570386151338206,
|
||
|
|
"calib/step_q_w": 0.6282366589327145,
|
||
|
|
"calib/step_q_w_n": 431.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1312,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.611111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 21256974.0,
|
||
|
|
"reward": 0.884765625,
|
||
|
|
"reward_std": 0.15012259781360626,
|
||
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 82
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7429947188567878,
|
||
|
|
"calib/avg_num_step_conf": 2.609375,
|
||
|
|
"calib/ece": 0.3607421875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.97265625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.2421875,
|
||
|
|
"calib/gap": 0.18623361292326834,
|
||
|
|
"calib/mean_conf": 0.7943359375000001,
|
||
|
|
"calib/mu_c": 0.8998198198198201,
|
||
|
|
"calib/mu_w": 0.7135862068965517,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
||
|
|
"calib/pce": 0.3607421875,
|
||
|
|
"calib/std_conf": 0.24654239548401424,
|
||
|
|
"calib/step_conf_rate": 0.9765625,
|
||
|
|
"calib/step_q_c": 0.8404744525547445,
|
||
|
|
"calib/step_q_c_n": 274.0,
|
||
|
|
"calib/step_q_gap": 0.1236978028085517,
|
||
|
|
"calib/step_q_w": 0.7167766497461928,
|
||
|
|
"calib/step_q_w_n": 394.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1328,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.555555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 21514150.0,
|
||
|
|
"reward": 0.919921875,
|
||
|
|
"reward_std": 0.17466981709003448,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.97265625,
|
||
|
|
"step": 83
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.8261391625615763,
|
||
|
|
"calib/avg_num_step_conf": 2.6640625,
|
||
|
|
"calib/ece": 0.3087890625000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.203125,
|
||
|
|
"calib/gap": 0.2556527093596058,
|
||
|
|
"calib/mean_conf": 0.7619140625,
|
||
|
|
"calib/mu_c": 0.9017241379310346,
|
||
|
|
"calib/mu_w": 0.6460714285714287,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
||
|
|
"calib/pce": 0.3087890625000001,
|
||
|
|
"calib/std_conf": 0.25246257542405387,
|
||
|
|
"calib/step_conf_rate": 0.98828125,
|
||
|
|
"calib/step_q_c": 0.8455813953488374,
|
||
|
|
"calib/step_q_c_n": 258.0,
|
||
|
|
"calib/step_q_gap": 0.17553422553751674,
|
||
|
|
"calib/step_q_w": 0.6700471698113206,
|
||
|
|
"calib/step_q_w_n": 424.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1344,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 21774518.0,
|
||
|
|
"reward": 0.94140625,
|
||
|
|
"reward_std": 0.18030638992786407,
|
||
|
|
"rewards/accuracy_reward_step": 0.453125,
|
||
|
|
"rewards/format_reward_step": 0.9765625,
|
||
|
|
"step": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7715619849844351,
|
||
|
|
"calib/avg_num_step_conf": 2.48828125,
|
||
|
|
"calib/ece": 0.27863281250000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.99609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.23046875,
|
||
|
|
"calib/gap": 0.23036501251297092,
|
||
|
|
"calib/mean_conf": 0.7825390624999999,
|
||
|
|
"calib/mu_c": 0.8968217054263566,
|
||
|
|
"calib/mu_w": 0.6664566929133857,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
||
|
|
"calib/pce": 0.27863281250000005,
|
||
|
|
"calib/std_conf": 0.2438529398769289,
|
||
|
|
"calib/step_conf_rate": 0.99609375,
|
||
|
|
"calib/step_q_c": 0.8435869565217391,
|
||
|
|
"calib/step_q_c_n": 276.0,
|
||
|
|
"calib/step_q_gap": 0.1602905576297724,
|
||
|
|
"calib/step_q_w": 0.6832963988919667,
|
||
|
|
"calib/step_q_w_n": 361.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.136,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 22032766.0,
|
||
|
|
"reward": 1.001953125,
|
||
|
|
"reward_std": 0.13992658257484436,
|
||
|
|
"rewards/accuracy_reward_step": 0.50390625,
|
||
|
|
"rewards/format_reward_step": 0.99609375,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7565901083126713,
|
||
|
|
"calib/avg_num_step_conf": 2.83203125,
|
||
|
|
"calib/ece": 0.41156862745098033,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.98046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.23921568627450981,
|
||
|
|
"calib/gap": 0.1795249902127104,
|
||
|
|
"calib/mean_conf": 0.7919607843137254,
|
||
|
|
"calib/mu_c": 0.9031958762886598,
|
||
|
|
"calib/mu_w": 0.7236708860759494,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.984375,
|
||
|
|
"calib/pce": 0.41156862745098033,
|
||
|
|
"calib/std_conf": 0.22489055820112214,
|
||
|
|
"calib/step_conf_rate": 0.984375,
|
||
|
|
"calib/step_q_c": 0.8238671875,
|
||
|
|
"calib/step_q_c_n": 256.0,
|
||
|
|
"calib/step_q_gap": 0.10211878664712148,
|
||
|
|
"calib/step_q_w": 0.7217484008528785,
|
||
|
|
"calib/step_q_w_n": 469.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 131.0,
|
||
|
|
"completions/max_terminated_length": 131.0,
|
||
|
|
"completions/mean_length": 0.51171875,
|
||
|
|
"completions/mean_terminated_length": 131.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 131.0,
|
||
|
|
"epoch": 0.1376,
|
||
|
|
"grad_norm": 4.982574939727783,
|
||
|
|
"learning_rate": 6.388888888888888e-07,
|
||
|
|
"loss": 0.0211,
|
||
|
|
"num_tokens": 22292889.0,
|
||
|
|
"reward": 0.869140625,
|
||
|
|
"reward_std": 0.16770878434181213,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.98046875,
|
||
|
|
"step": 86
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.75,
|
||
|
|
"calib/avg_num_step_conf": 2.6015625,
|
||
|
|
"calib/ece": 0.22847656250000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.94921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.22265625,
|
||
|
|
"calib/gap": 0.19762512512512487,
|
||
|
|
"calib/mean_conf": 0.7987890625,
|
||
|
|
"calib/mu_c": 0.8821621621621621,
|
||
|
|
"calib/mu_w": 0.6845370370370373,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.953125,
|
||
|
|
"calib/pce": 0.22457031250000006,
|
||
|
|
"calib/std_conf": 0.22614055199227556,
|
||
|
|
"calib/step_conf_rate": 0.953125,
|
||
|
|
"calib/step_q_c": 0.8204427083333333,
|
||
|
|
"calib/step_q_c_n": 384.0,
|
||
|
|
"calib/step_q_gap": 0.06884696365248233,
|
||
|
|
"calib/step_q_w": 0.751595744680851,
|
||
|
|
"calib/step_q_w_n": 282.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1392,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.333333333333332e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 22550345.0,
|
||
|
|
"reward": 1.052734375,
|
||
|
|
"reward_std": 0.21600019931793213,
|
||
|
|
"rewards/accuracy_reward_step": 0.578125,
|
||
|
|
"rewards/format_reward_step": 0.94921875,
|
||
|
|
"step": 87
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.6679403541472506,
|
||
|
|
"calib/avg_num_step_conf": 2.40625,
|
||
|
|
"calib/ece": 0.35699218750000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.234375,
|
||
|
|
"calib/gap": 0.15627151289220276,
|
||
|
|
"calib/mean_conf": 0.7905859375,
|
||
|
|
"calib/mu_c": 0.8790990990990993,
|
||
|
|
"calib/mu_w": 0.7228275862068966,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9609375,
|
||
|
|
"calib/pce": 0.35699218750000006,
|
||
|
|
"calib/std_conf": 0.2487815001205799,
|
||
|
|
"calib/step_conf_rate": 0.9609375,
|
||
|
|
"calib/step_q_c": 0.8083908045977012,
|
||
|
|
"calib/step_q_c_n": 261.0,
|
||
|
|
"calib/step_q_gap": 0.06374291727375747,
|
||
|
|
"calib/step_q_w": 0.7446478873239437,
|
||
|
|
"calib/step_q_w_n": 355.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1408,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.277777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 22809337.0,
|
||
|
|
"reward": 0.9140625,
|
||
|
|
"reward_std": 0.15110857784748077,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.9609375,
|
||
|
|
"step": 88
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7653526220614829,
|
||
|
|
"calib/avg_num_step_conf": 2.34375,
|
||
|
|
"calib/ece": 0.38968503937007887,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.92578125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.20078740157480315,
|
||
|
|
"calib/gap": 0.27345678119349026,
|
||
|
|
"calib/mean_conf": 0.7007086614173228,
|
||
|
|
"calib/mu_c": 0.889113924050633,
|
||
|
|
"calib/mu_w": 0.6156571428571428,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.93359375,
|
||
|
|
"calib/pce": 0.38968503937007887,
|
||
|
|
"calib/std_conf": 0.3153904744000723,
|
||
|
|
"calib/step_conf_rate": 0.93359375,
|
||
|
|
"calib/step_q_c": 0.8161739130434781,
|
||
|
|
"calib/step_q_c_n": 230.0,
|
||
|
|
"calib/step_q_gap": 0.1399576968272619,
|
||
|
|
"calib/step_q_w": 0.6762162162162162,
|
||
|
|
"calib/step_q_w_n": 370.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 173.0,
|
||
|
|
"completions/max_terminated_length": 173.0,
|
||
|
|
"completions/mean_length": 0.921875,
|
||
|
|
"completions/mean_terminated_length": 118.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 63.0,
|
||
|
|
"epoch": 0.1424,
|
||
|
|
"grad_norm": 18.566625595092773,
|
||
|
|
"learning_rate": 6.222222222222223e-07,
|
||
|
|
"loss": 0.0773,
|
||
|
|
"num_tokens": 23070365.0,
|
||
|
|
"reward": 0.771484375,
|
||
|
|
"reward_std": 0.18201754987239838,
|
||
|
|
"rewards/accuracy_reward_step": 0.30859375,
|
||
|
|
"rewards/format_reward_step": 0.92578125,
|
||
|
|
"step": 89
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7787698412698413,
|
||
|
|
"calib/avg_num_step_conf": 2.2265625,
|
||
|
|
"calib/ece": 0.3140625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1875,
|
||
|
|
"calib/gap": 0.25182539682539684,
|
||
|
|
"calib/mean_conf": 0.7515625,
|
||
|
|
"calib/mu_c": 0.8932142857142856,
|
||
|
|
"calib/mu_w": 0.6413888888888888,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.92578125,
|
||
|
|
"calib/pce": 0.3140625,
|
||
|
|
"calib/std_conf": 0.2732628013355458,
|
||
|
|
"calib/step_conf_rate": 0.92578125,
|
||
|
|
"calib/step_q_c": 0.8295035460992908,
|
||
|
|
"calib/step_q_c_n": 282.0,
|
||
|
|
"calib/step_q_gap": 0.12762854609929086,
|
||
|
|
"calib/step_q_w": 0.7018749999999999,
|
||
|
|
"calib/step_q_w_n": 288.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.144,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.166666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 23332245.0,
|
||
|
|
"reward": 0.8984375,
|
||
|
|
"reward_std": 0.11948448419570923,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 0.921875,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.728298611111111,
|
||
|
|
"calib/avg_num_step_conf": 2.15234375,
|
||
|
|
"calib/ece": 0.32609375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1875,
|
||
|
|
"calib/gap": 0.22345238095238096,
|
||
|
|
"calib/mean_conf": 0.7635937500000001,
|
||
|
|
"calib/mu_c": 0.8892857142857142,
|
||
|
|
"calib/mu_w": 0.6658333333333333,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
||
|
|
"calib/pce": 0.32609375,
|
||
|
|
"calib/std_conf": 0.26812204070336604,
|
||
|
|
"calib/step_conf_rate": 0.921875,
|
||
|
|
"calib/step_q_c": 0.8298837209302327,
|
||
|
|
"calib/step_q_c_n": 258.0,
|
||
|
|
"calib/step_q_gap": 0.12933764584490848,
|
||
|
|
"calib/step_q_w": 0.7005460750853242,
|
||
|
|
"calib/step_q_w_n": 293.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1456,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.111111111111112e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 23586941.0,
|
||
|
|
"reward": 0.89453125,
|
||
|
|
"reward_std": 0.2004837840795517,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 0.9140625,
|
||
|
|
"step": 91
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7191771642991155,
|
||
|
|
"calib/avg_num_step_conf": 2.171875,
|
||
|
|
"calib/ece": 0.3930196078431373,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.93359375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.12941176470588237,
|
||
|
|
"calib/gap": 0.20351313320825537,
|
||
|
|
"calib/mean_conf": 0.7498823529411764,
|
||
|
|
"calib/mu_c": 0.880769230769231,
|
||
|
|
"calib/mu_w": 0.6772560975609756,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9375,
|
||
|
|
"calib/pce": 0.3930196078431373,
|
||
|
|
"calib/std_conf": 0.27616772731467004,
|
||
|
|
"calib/step_conf_rate": 0.9375,
|
||
|
|
"calib/step_q_c": 0.8341255605381168,
|
||
|
|
"calib/step_q_c_n": 223.0,
|
||
|
|
"calib/step_q_gap": 0.11947090588346232,
|
||
|
|
"calib/step_q_w": 0.7146546546546545,
|
||
|
|
"calib/step_q_w_n": 333.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 58.0,
|
||
|
|
"completions/max_terminated_length": 58.0,
|
||
|
|
"completions/mean_length": 0.2265625,
|
||
|
|
"completions/mean_terminated_length": 58.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 58.0,
|
||
|
|
"epoch": 0.1472,
|
||
|
|
"grad_norm": 2.000593662261963,
|
||
|
|
"learning_rate": 6.055555555555555e-07,
|
||
|
|
"loss": 0.0084,
|
||
|
|
"num_tokens": 23846735.0,
|
||
|
|
"reward": 0.822265625,
|
||
|
|
"reward_std": 0.23607444763183594,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.93359375,
|
||
|
|
"step": 92
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6951848400124262,
|
||
|
|
"calib/avg_num_step_conf": 2.13671875,
|
||
|
|
"calib/ece": 0.3127734375000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1484375,
|
||
|
|
"calib/gap": 0.2170034172103139,
|
||
|
|
"calib/mean_conf": 0.7463671875,
|
||
|
|
"calib/mu_c": 0.8692792792792794,
|
||
|
|
"calib/mu_w": 0.6522758620689655,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
||
|
|
"calib/pce": 0.3127734375000001,
|
||
|
|
"calib/std_conf": 0.2700659748993565,
|
||
|
|
"calib/step_conf_rate": 0.96484375,
|
||
|
|
"calib/step_q_c": 0.8295121951219512,
|
||
|
|
"calib/step_q_c_n": 246.0,
|
||
|
|
"calib/step_q_gap": 0.15549226156713392,
|
||
|
|
"calib/step_q_w": 0.6740199335548173,
|
||
|
|
"calib/step_q_w_n": 301.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1488,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 24108375.0,
|
||
|
|
"reward": 0.9140625,
|
||
|
|
"reward_std": 0.17483949661254883,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.9609375,
|
||
|
|
"step": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8387096774193549,
|
||
|
|
"calib/avg_num_step_conf": 1.83203125,
|
||
|
|
"calib/ece": 0.29234375000000007,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1484375,
|
||
|
|
"calib/gap": 0.34038965186841263,
|
||
|
|
"calib/mean_conf": 0.6868749999999999,
|
||
|
|
"calib/mu_c": 0.892970297029703,
|
||
|
|
"calib/mu_w": 0.5525806451612904,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9140625,
|
||
|
|
"calib/pce": 0.29234375000000007,
|
||
|
|
"calib/std_conf": 0.32713302435706487,
|
||
|
|
"calib/step_conf_rate": 0.9140625,
|
||
|
|
"calib/step_q_c": 0.8342574257425742,
|
||
|
|
"calib/step_q_c_n": 202.0,
|
||
|
|
"calib/step_q_gap": 0.21414506619201235,
|
||
|
|
"calib/step_q_w": 0.6201123595505619,
|
||
|
|
"calib/step_q_w_n": 267.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1504,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.944444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 24363951.0,
|
||
|
|
"reward": 0.8515625,
|
||
|
|
"reward_std": 0.18971920013427734,
|
||
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
||
|
|
"rewards/format_reward_step": 0.9140625,
|
||
|
|
"step": 94
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7844712182061581,
|
||
|
|
"calib/avg_num_step_conf": 2.06640625,
|
||
|
|
"calib/ece": 0.40230468750000015,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9296875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1484375,
|
||
|
|
"calib/gap": 0.2171365461847391,
|
||
|
|
"calib/mean_conf": 0.7538671875,
|
||
|
|
"calib/mu_c": 0.8946666666666668,
|
||
|
|
"calib/mu_w": 0.6775301204819277,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.93359375,
|
||
|
|
"calib/pce": 0.40230468750000015,
|
||
|
|
"calib/std_conf": 0.2606982172663247,
|
||
|
|
"calib/step_conf_rate": 0.93359375,
|
||
|
|
"calib/step_q_c": 0.8169999999999998,
|
||
|
|
"calib/step_q_c_n": 200.0,
|
||
|
|
"calib/step_q_gap": 0.1401306990881458,
|
||
|
|
"calib/step_q_w": 0.676869300911854,
|
||
|
|
"calib/step_q_w_n": 329.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.152,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.888888888888889e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 24624599.0,
|
||
|
|
"reward": 0.81640625,
|
||
|
|
"reward_std": 0.16954180598258972,
|
||
|
|
"rewards/accuracy_reward_step": 0.3515625,
|
||
|
|
"rewards/format_reward_step": 0.9296875,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7233352056418898,
|
||
|
|
"calib/avg_num_step_conf": 1.9375,
|
||
|
|
"calib/ece": 0.3401171875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.140625,
|
||
|
|
"calib/gap": 0.19886725332334776,
|
||
|
|
"calib/mean_conf": 0.7658984375,
|
||
|
|
"calib/mu_c": 0.8800917431192661,
|
||
|
|
"calib/mu_w": 0.6812244897959183,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9609375,
|
||
|
|
"calib/pce": 0.3401171875,
|
||
|
|
"calib/std_conf": 0.25488809860418865,
|
||
|
|
"calib/step_conf_rate": 0.9609375,
|
||
|
|
"calib/step_q_c": 0.8263636363636364,
|
||
|
|
"calib/step_q_c_n": 242.0,
|
||
|
|
"calib/step_q_gap": 0.128253400143164,
|
||
|
|
"calib/step_q_w": 0.6981102362204724,
|
||
|
|
"calib/step_q_w_n": 254.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1536,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.833333333333334e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 24885647.0,
|
||
|
|
"reward": 0.90625,
|
||
|
|
"reward_std": 0.15150675177574158,
|
||
|
|
"rewards/accuracy_reward_step": 0.42578125,
|
||
|
|
"rewards/format_reward_step": 0.9609375,
|
||
|
|
"step": 96
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8210475886298307,
|
||
|
|
"calib/avg_num_step_conf": 2.21484375,
|
||
|
|
"calib/ece": 0.3226171874999999,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.953125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08203125,
|
||
|
|
"calib/gap": 0.2630808048546791,
|
||
|
|
"calib/mean_conf": 0.7171484375,
|
||
|
|
"calib/mu_c": 0.8764356435643564,
|
||
|
|
"calib/mu_w": 0.6133548387096773,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.953125,
|
||
|
|
"calib/pce": 0.3226171874999999,
|
||
|
|
"calib/std_conf": 0.26849274285594493,
|
||
|
|
"calib/step_conf_rate": 0.953125,
|
||
|
|
"calib/step_q_c": 0.8418433179723502,
|
||
|
|
"calib/step_q_c_n": 217.0,
|
||
|
|
"calib/step_q_gap": 0.18964331797235023,
|
||
|
|
"calib/step_q_w": 0.6522,
|
||
|
|
"calib/step_q_w_n": 350.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1552,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.777777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 25146071.0,
|
||
|
|
"reward": 0.87109375,
|
||
|
|
"reward_std": 0.09396559000015259,
|
||
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
||
|
|
"rewards/format_reward_step": 0.953125,
|
||
|
|
"step": 97
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8168831168831169,
|
||
|
|
"calib/avg_num_step_conf": 1.81640625,
|
||
|
|
"calib/ece": 0.3071093750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.88671875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.109375,
|
||
|
|
"calib/gap": 0.3438121878121879,
|
||
|
|
"calib/mean_conf": 0.662578125,
|
||
|
|
"calib/mu_c": 0.8841758241758243,
|
||
|
|
"calib/mu_w": 0.5403636363636364,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.88671875,
|
||
|
|
"calib/pce": 0.3071093750000001,
|
||
|
|
"calib/std_conf": 0.3359600192306882,
|
||
|
|
"calib/step_conf_rate": 0.88671875,
|
||
|
|
"calib/step_q_c": 0.832010582010582,
|
||
|
|
"calib/step_q_c_n": 189.0,
|
||
|
|
"calib/step_q_gap": 0.18371348056130665,
|
||
|
|
"calib/step_q_w": 0.6482971014492753,
|
||
|
|
"calib/step_q_w_n": 276.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1568,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 25405423.0,
|
||
|
|
"reward": 0.798828125,
|
||
|
|
"reward_std": 0.13947027921676636,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.88671875,
|
||
|
|
"step": 98
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8558369005296379,
|
||
|
|
"calib/avg_num_step_conf": 2.21875,
|
||
|
|
"calib/ece": 0.39683593750000007,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.95703125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1484375,
|
||
|
|
"calib/gap": 0.2849952840455633,
|
||
|
|
"calib/mean_conf": 0.6944921875,
|
||
|
|
"calib/mu_c": 0.8937662337662337,
|
||
|
|
"calib/mu_w": 0.6087709497206704,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.95703125,
|
||
|
|
"calib/pce": 0.39527343750000005,
|
||
|
|
"calib/std_conf": 0.29874564578662033,
|
||
|
|
"calib/step_conf_rate": 0.95703125,
|
||
|
|
"calib/step_q_c": 0.8462359550561798,
|
||
|
|
"calib/step_q_c_n": 178.0,
|
||
|
|
"calib/step_q_gap": 0.20731287813310284,
|
||
|
|
"calib/step_q_w": 0.6389230769230769,
|
||
|
|
"calib/step_q_w_n": 390.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1584,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.666666666666666e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 25667567.0,
|
||
|
|
"reward": 0.779296875,
|
||
|
|
"reward_std": 0.1340028941631317,
|
||
|
|
"rewards/accuracy_reward_step": 0.30078125,
|
||
|
|
"rewards/format_reward_step": 0.95703125,
|
||
|
|
"step": 99
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7632051282051282,
|
||
|
|
"calib/avg_num_step_conf": 2.046875,
|
||
|
|
"calib/ece": 0.3394140625000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.140625,
|
||
|
|
"calib/gap": 0.2556076923076924,
|
||
|
|
"calib/mean_conf": 0.7300390625,
|
||
|
|
"calib/mu_c": 0.8858000000000001,
|
||
|
|
"calib/mu_w": 0.6301923076923077,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
||
|
|
"calib/pce": 0.3394140625000001,
|
||
|
|
"calib/std_conf": 0.29843667937959817,
|
||
|
|
"calib/step_conf_rate": 0.921875,
|
||
|
|
"calib/step_q_c": 0.8273076923076924,
|
||
|
|
"calib/step_q_c_n": 208.0,
|
||
|
|
"calib/step_q_gap": 0.15468111002921148,
|
||
|
|
"calib/step_q_w": 0.6726265822784809,
|
||
|
|
"calib/step_q_w_n": 316.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.16,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.611111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 25929711.0,
|
||
|
|
"reward": 0.8515625,
|
||
|
|
"reward_std": 0.1345345377922058,
|
||
|
|
"rewards/accuracy_reward_step": 0.390625,
|
||
|
|
"rewards/format_reward_step": 0.921875,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7967128630127447,
|
||
|
|
"calib/avg_num_step_conf": 1.9453125,
|
||
|
|
"calib/ece": 0.33968750000000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.90234375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1171875,
|
||
|
|
"calib/gap": 0.3065756668291665,
|
||
|
|
"calib/mean_conf": 0.6639062499999999,
|
||
|
|
"calib/mu_c": 0.8710843373493977,
|
||
|
|
"calib/mu_w": 0.5645086705202312,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.90625,
|
||
|
|
"calib/pce": 0.33968750000000003,
|
||
|
|
"calib/std_conf": 0.32286978406307626,
|
||
|
|
"calib/step_conf_rate": 0.90625,
|
||
|
|
"calib/step_q_c": 0.8103314917127072,
|
||
|
|
"calib/step_q_c_n": 181.0,
|
||
|
|
"calib/step_q_gap": 0.19449552956759697,
|
||
|
|
"calib/step_q_w": 0.6158359621451103,
|
||
|
|
"calib/step_q_w_n": 317.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1616,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.555555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 26191759.0,
|
||
|
|
"reward": 0.775390625,
|
||
|
|
"reward_std": 0.1538757085800171,
|
||
|
|
"rewards/accuracy_reward_step": 0.32421875,
|
||
|
|
"rewards/format_reward_step": 0.90234375,
|
||
|
|
"step": 101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8637359198998747,
|
||
|
|
"calib/avg_num_step_conf": 1.7109375,
|
||
|
|
"calib/ece": 0.4073437500000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0859375,
|
||
|
|
"calib/gap": 0.29232790988735924,
|
||
|
|
"calib/mean_conf": 0.6729687499999999,
|
||
|
|
"calib/mu_c": 0.8876470588235295,
|
||
|
|
"calib/mu_w": 0.5953191489361702,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8984375,
|
||
|
|
"calib/pce": 0.4073437500000001,
|
||
|
|
"calib/std_conf": 0.29595676250668357,
|
||
|
|
"calib/step_conf_rate": 0.8984375,
|
||
|
|
"calib/step_q_c": 0.8413432835820894,
|
||
|
|
"calib/step_q_c_n": 134.0,
|
||
|
|
"calib/step_q_gap": 0.19173802042419463,
|
||
|
|
"calib/step_q_w": 0.6496052631578948,
|
||
|
|
"calib/step_q_w_n": 304.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1632,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 26453903.0,
|
||
|
|
"reward": 0.71484375,
|
||
|
|
"reward_std": 0.1617899388074875,
|
||
|
|
"rewards/accuracy_reward_step": 0.265625,
|
||
|
|
"rewards/format_reward_step": 0.8984375,
|
||
|
|
"step": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8071111111111111,
|
||
|
|
"calib/avg_num_step_conf": 2.03515625,
|
||
|
|
"calib/ece": 0.30921568627450985,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.96484375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09803921568627451,
|
||
|
|
"calib/gap": 0.2580285714285715,
|
||
|
|
"calib/mean_conf": 0.7209803921568628,
|
||
|
|
"calib/mu_c": 0.8727619047619048,
|
||
|
|
"calib/mu_w": 0.6147333333333334,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
||
|
|
"calib/pce": 0.30921568627450985,
|
||
|
|
"calib/std_conf": 0.2757311485527146,
|
||
|
|
"calib/step_conf_rate": 0.96484375,
|
||
|
|
"calib/step_q_c": 0.8194270833333334,
|
||
|
|
"calib/step_q_c_n": 192.0,
|
||
|
|
"calib/step_q_gap": 0.16100763044579547,
|
||
|
|
"calib/step_q_w": 0.6584194528875379,
|
||
|
|
"calib/step_q_w_n": 329.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 90.0,
|
||
|
|
"completions/max_terminated_length": 90.0,
|
||
|
|
"completions/mean_length": 0.3515625,
|
||
|
|
"completions/mean_terminated_length": 90.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 90.0,
|
||
|
|
"epoch": 0.1648,
|
||
|
|
"grad_norm": 13.912463188171387,
|
||
|
|
"learning_rate": 5.444444444444443e-07,
|
||
|
|
"loss": 0.0387,
|
||
|
|
"num_tokens": 26715417.0,
|
||
|
|
"reward": 0.892578125,
|
||
|
|
"reward_std": 0.1751222312450409,
|
||
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
||
|
|
"rewards/format_reward_step": 0.96484375,
|
||
|
|
"step": 103
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7132736306201313,
|
||
|
|
"calib/avg_num_step_conf": 1.88671875,
|
||
|
|
"calib/ece": 0.30765624999999996,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.94921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08203125,
|
||
|
|
"calib/gap": 0.16475986014843913,
|
||
|
|
"calib/mean_conf": 0.7725,
|
||
|
|
"calib/mu_c": 0.8606722689075632,
|
||
|
|
"calib/mu_w": 0.6959124087591241,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.95703125,
|
||
|
|
"calib/pce": 0.30765624999999996,
|
||
|
|
"calib/std_conf": 0.2190640602654849,
|
||
|
|
"calib/step_conf_rate": 0.95703125,
|
||
|
|
"calib/step_q_c": 0.8040092165898618,
|
||
|
|
"calib/step_q_c_n": 217.0,
|
||
|
|
"calib/step_q_gap": 0.08070094591317001,
|
||
|
|
"calib/step_q_w": 0.7233082706766918,
|
||
|
|
"calib/step_q_w_n": 266.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1664,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.388888888888888e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 26970097.0,
|
||
|
|
"reward": 0.939453125,
|
||
|
|
"reward_std": 0.2298126220703125,
|
||
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
||
|
|
"rewards/format_reward_step": 0.94921875,
|
||
|
|
"step": 104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7389437526835552,
|
||
|
|
"calib/avg_num_step_conf": 2.0859375,
|
||
|
|
"calib/ece": 0.3115625000000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.12890625,
|
||
|
|
"calib/gap": 0.19483285284916885,
|
||
|
|
"calib/mean_conf": 0.77640625,
|
||
|
|
"calib/mu_c": 0.880672268907563,
|
||
|
|
"calib/mu_w": 0.6858394160583942,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9765625,
|
||
|
|
"calib/pce": 0.3115625000000001,
|
||
|
|
"calib/std_conf": 0.23548564969215746,
|
||
|
|
"calib/step_conf_rate": 0.9765625,
|
||
|
|
"calib/step_q_c": 0.8402409638554217,
|
||
|
|
"calib/step_q_c_n": 249.0,
|
||
|
|
"calib/step_q_gap": 0.12150412175015857,
|
||
|
|
"calib/step_q_w": 0.7187368421052631,
|
||
|
|
"calib/step_q_w_n": 285.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.168,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.333333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 27231649.0,
|
||
|
|
"reward": 0.953125,
|
||
|
|
"reward_std": 0.19588688015937805,
|
||
|
|
"rewards/accuracy_reward_step": 0.46484375,
|
||
|
|
"rewards/format_reward_step": 0.9765625,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7503485454038479,
|
||
|
|
"calib/avg_num_step_conf": 1.8125,
|
||
|
|
"calib/ece": 0.4481640625,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.91015625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.06640625,
|
||
|
|
"calib/gap": 0.26279672832047585,
|
||
|
|
"calib/mean_conf": 0.6551953125,
|
||
|
|
"calib/mu_c": 0.8635849056603773,
|
||
|
|
"calib/mu_w": 0.6007881773399014,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9140625,
|
||
|
|
"calib/pce": 0.4481640625,
|
||
|
|
"calib/std_conf": 0.3329671896043623,
|
||
|
|
"calib/step_conf_rate": 0.9140625,
|
||
|
|
"calib/step_q_c": 0.8060465116279069,
|
||
|
|
"calib/step_q_c_n": 129.0,
|
||
|
|
"calib/step_q_gap": 0.12658382506074273,
|
||
|
|
"calib/step_q_w": 0.6794626865671641,
|
||
|
|
"calib/step_q_w_n": 335.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1696,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.277777777777777e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 27493401.0,
|
||
|
|
"reward": 0.662109375,
|
||
|
|
"reward_std": 0.21436436474323273,
|
||
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
||
|
|
"rewards/format_reward_step": 0.91015625,
|
||
|
|
"step": 106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6810475625041229,
|
||
|
|
"calib/avg_num_step_conf": 1.58984375,
|
||
|
|
"calib/ece": 0.41011718750000004,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.90234375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.140625,
|
||
|
|
"calib/gap": 0.17468500560723021,
|
||
|
|
"calib/mean_conf": 0.7733984375,
|
||
|
|
"calib/mu_c": 0.8846236559139786,
|
||
|
|
"calib/mu_w": 0.7099386503067484,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.90234375,
|
||
|
|
"calib/pce": 0.41011718750000004,
|
||
|
|
"calib/std_conf": 0.2439225619895761,
|
||
|
|
"calib/step_conf_rate": 0.90234375,
|
||
|
|
"calib/step_q_c": 0.859607843137255,
|
||
|
|
"calib/step_q_c_n": 153.0,
|
||
|
|
"calib/step_q_gap": 0.13110390612938116,
|
||
|
|
"calib/step_q_w": 0.7285039370078739,
|
||
|
|
"calib/step_q_w_n": 254.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1712,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.222222222222223e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 27754953.0,
|
||
|
|
"reward": 0.814453125,
|
||
|
|
"reward_std": 0.1844952404499054,
|
||
|
|
"rewards/accuracy_reward_step": 0.36328125,
|
||
|
|
"rewards/format_reward_step": 0.90234375,
|
||
|
|
"step": 107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8188978829389788,
|
||
|
|
"calib/avg_num_step_conf": 1.51171875,
|
||
|
|
"calib/ece": 0.2666406250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8203125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1015625,
|
||
|
|
"calib/gap": 0.2998978829389789,
|
||
|
|
"calib/mean_conf": 0.6963281250000001,
|
||
|
|
"calib/mu_c": 0.8673636363636364,
|
||
|
|
"calib/mu_w": 0.5674657534246575,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8203125,
|
||
|
|
"calib/pce": 0.2666406250000001,
|
||
|
|
"calib/std_conf": 0.2968121541210608,
|
||
|
|
"calib/step_conf_rate": 0.8203125,
|
||
|
|
"calib/step_q_c": 0.8295327102803738,
|
||
|
|
"calib/step_q_c_n": 214.0,
|
||
|
|
"calib/step_q_gap": 0.14311652530927543,
|
||
|
|
"calib/step_q_w": 0.6864161849710984,
|
||
|
|
"calib/step_q_w_n": 173.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1728,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.166666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 28015593.0,
|
||
|
|
"reward": 0.83984375,
|
||
|
|
"reward_std": 0.15671934187412262,
|
||
|
|
"rewards/accuracy_reward_step": 0.4296875,
|
||
|
|
"rewards/format_reward_step": 0.8203125,
|
||
|
|
"step": 108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.727376707259638,
|
||
|
|
"calib/avg_num_step_conf": 1.8828125,
|
||
|
|
"calib/ece": 0.41750000000000004,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1171875,
|
||
|
|
"calib/gap": 0.17363654713045829,
|
||
|
|
"calib/mean_conf": 0.76515625,
|
||
|
|
"calib/mu_c": 0.8784269662921349,
|
||
|
|
"calib/mu_w": 0.7047904191616766,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
||
|
|
"calib/pce": 0.41750000000000004,
|
||
|
|
"calib/std_conf": 0.2495370224754986,
|
||
|
|
"calib/step_conf_rate": 0.921875,
|
||
|
|
"calib/step_q_c": 0.8147619047619048,
|
||
|
|
"calib/step_q_c_n": 189.0,
|
||
|
|
"calib/step_q_gap": 0.08438647814074451,
|
||
|
|
"calib/step_q_w": 0.7303754266211603,
|
||
|
|
"calib/step_q_w_n": 293.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1744,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.111111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 28271961.0,
|
||
|
|
"reward": 0.80859375,
|
||
|
|
"reward_std": 0.18457874655723572,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 0.921875,
|
||
|
|
"step": 109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.787047371031746,
|
||
|
|
"calib/avg_num_step_conf": 2.2421875,
|
||
|
|
"calib/ece": 0.30824218749999993,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.92578125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1328125,
|
||
|
|
"calib/gap": 0.22788690476190476,
|
||
|
|
"calib/mean_conf": 0.7457421875,
|
||
|
|
"calib/mu_c": 0.8739285714285714,
|
||
|
|
"calib/mu_w": 0.6460416666666666,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.93359375,
|
||
|
|
"calib/pce": 0.30824218749999993,
|
||
|
|
"calib/std_conf": 0.24994733026722818,
|
||
|
|
"calib/step_conf_rate": 0.93359375,
|
||
|
|
"calib/step_q_c": 0.8352301255230126,
|
||
|
|
"calib/step_q_c_n": 239.0,
|
||
|
|
"calib/step_q_gap": 0.13048385686629627,
|
||
|
|
"calib/step_q_w": 0.7047462686567163,
|
||
|
|
"calib/step_q_w_n": 335.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.176,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5.055555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 28534105.0,
|
||
|
|
"reward": 0.900390625,
|
||
|
|
"reward_std": 0.16185137629508972,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 0.92578125,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8895753622279371,
|
||
|
|
"calib/avg_num_step_conf": 1.4921875,
|
||
|
|
"calib/ece": 0.2669531250000002,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8671875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.078125,
|
||
|
|
"calib/gap": 0.3484952643793515,
|
||
|
|
"calib/mean_conf": 0.6849218749999999,
|
||
|
|
"calib/mu_c": 0.8877570093457945,
|
||
|
|
"calib/mu_w": 0.539261744966443,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8671875,
|
||
|
|
"calib/pce": 0.2669531250000002,
|
||
|
|
"calib/std_conf": 0.3133712756403885,
|
||
|
|
"calib/step_conf_rate": 0.8671875,
|
||
|
|
"calib/step_q_c": 0.8516292134831461,
|
||
|
|
"calib/step_q_c_n": 178.0,
|
||
|
|
"calib/step_q_gap": 0.20246254681647957,
|
||
|
|
"calib/step_q_w": 0.6491666666666666,
|
||
|
|
"calib/step_q_w_n": 204.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1776,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 28793977.0,
|
||
|
|
"reward": 0.8515625,
|
||
|
|
"reward_std": 0.18710467219352722,
|
||
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
||
|
|
"rewards/format_reward_step": 0.8671875,
|
||
|
|
"step": 111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7499269005847954,
|
||
|
|
"calib/avg_num_step_conf": 1.6796875,
|
||
|
|
"calib/ece": 0.4177343750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08984375,
|
||
|
|
"calib/gap": 0.22549122807017552,
|
||
|
|
"calib/mean_conf": 0.714609375,
|
||
|
|
"calib/mu_c": 0.8731578947368421,
|
||
|
|
"calib/mu_w": 0.6476666666666666,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9140625,
|
||
|
|
"calib/pce": 0.4177343750000001,
|
||
|
|
"calib/std_conf": 0.2815176569277838,
|
||
|
|
"calib/step_conf_rate": 0.9140625,
|
||
|
|
"calib/step_q_c": 0.8282608695652175,
|
||
|
|
"calib/step_q_c_n": 138.0,
|
||
|
|
"calib/step_q_gap": 0.12326086956521742,
|
||
|
|
"calib/step_q_w": 0.7050000000000001,
|
||
|
|
"calib/step_q_w_n": 292.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1792,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.944444444444445e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 29056121.0,
|
||
|
|
"reward": 0.75390625,
|
||
|
|
"reward_std": 0.17721685767173767,
|
||
|
|
"rewards/accuracy_reward_step": 0.296875,
|
||
|
|
"rewards/format_reward_step": 0.9140625,
|
||
|
|
"step": 112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.6963464486458866,
|
||
|
|
"calib/avg_num_step_conf": 1.80078125,
|
||
|
|
"calib/ece": 0.39121568627450987,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.13333333333333333,
|
||
|
|
"calib/gap": 0.15047713336739899,
|
||
|
|
"calib/mean_conf": 0.7872941176470588,
|
||
|
|
"calib/mu_c": 0.8769902912621359,
|
||
|
|
"calib/mu_w": 0.7265131578947369,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.94140625,
|
||
|
|
"calib/pce": 0.3872941176470589,
|
||
|
|
"calib/std_conf": 0.20832885900616288,
|
||
|
|
"calib/step_conf_rate": 0.94140625,
|
||
|
|
"calib/step_q_c": 0.8564848484848484,
|
||
|
|
"calib/step_q_c_n": 165.0,
|
||
|
|
"calib/step_q_gap": 0.11553890253890253,
|
||
|
|
"calib/step_q_w": 0.7409459459459459,
|
||
|
|
"calib/step_q_w_n": 296.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 109.0,
|
||
|
|
"completions/max_terminated_length": 109.0,
|
||
|
|
"completions/mean_length": 0.42578125,
|
||
|
|
"completions/mean_terminated_length": 109.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 109.0,
|
||
|
|
"epoch": 0.1808,
|
||
|
|
"grad_norm": 13.413500785827637,
|
||
|
|
"learning_rate": 4.888888888888889e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 29312030.0,
|
||
|
|
"reward": 0.87109375,
|
||
|
|
"reward_std": 0.26899805665016174,
|
||
|
|
"rewards/accuracy_reward_step": 0.40234375,
|
||
|
|
"rewards/format_reward_step": 0.9375,
|
||
|
|
"step": 113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8162942392146733,
|
||
|
|
"calib/avg_num_step_conf": 1.37109375,
|
||
|
|
"calib/ece": 0.2621875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.77734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0859375,
|
||
|
|
"calib/gap": 0.3598553345388787,
|
||
|
|
"calib/mean_conf": 0.6371875,
|
||
|
|
"calib/mu_c": 0.8592857142857141,
|
||
|
|
"calib/mu_w": 0.4994303797468354,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.78125,
|
||
|
|
"calib/pce": 0.25828125,
|
||
|
|
"calib/std_conf": 0.35472466765612737,
|
||
|
|
"calib/step_conf_rate": 0.78125,
|
||
|
|
"calib/step_q_c": 0.834406779661017,
|
||
|
|
"calib/step_q_c_n": 177.0,
|
||
|
|
"calib/step_q_gap": 0.2173378141437755,
|
||
|
|
"calib/step_q_w": 0.6170689655172414,
|
||
|
|
"calib/step_q_w_n": 174.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1824,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.833333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 29572862.0,
|
||
|
|
"reward": 0.771484375,
|
||
|
|
"reward_std": 0.11413875222206116,
|
||
|
|
"rewards/accuracy_reward_step": 0.3828125,
|
||
|
|
"rewards/format_reward_step": 0.77734375,
|
||
|
|
"step": 114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.81134033203125,
|
||
|
|
"calib/avg_num_step_conf": 1.8671875,
|
||
|
|
"calib/ece": 0.24710937500000002,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.87109375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.15234375,
|
||
|
|
"calib/gap": 0.270625,
|
||
|
|
"calib/mean_conf": 0.747109375,
|
||
|
|
"calib/mu_c": 0.882421875,
|
||
|
|
"calib/mu_w": 0.611796875,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.87109375,
|
||
|
|
"calib/pce": 0.24710937500000002,
|
||
|
|
"calib/std_conf": 0.26624164407002404,
|
||
|
|
"calib/step_conf_rate": 0.87109375,
|
||
|
|
"calib/step_q_c": 0.8349027237354085,
|
||
|
|
"calib/step_q_c_n": 257.0,
|
||
|
|
"calib/step_q_gap": 0.13060408120147193,
|
||
|
|
"calib/step_q_w": 0.7042986425339366,
|
||
|
|
"calib/step_q_w_n": 221.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.184,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.777777777777778e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 29833878.0,
|
||
|
|
"reward": 0.935546875,
|
||
|
|
"reward_std": 0.16707748174667358,
|
||
|
|
"rewards/accuracy_reward_step": 0.5,
|
||
|
|
"rewards/format_reward_step": 0.87109375,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7463149971774447,
|
||
|
|
"calib/avg_num_step_conf": 1.82421875,
|
||
|
|
"calib/ece": 0.3782421874999998,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9296875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.13671875,
|
||
|
|
"calib/gap": 0.14845574860440292,
|
||
|
|
"calib/mean_conf": 0.7962109374999999,
|
||
|
|
"calib/mu_c": 0.8826168224299064,
|
||
|
|
"calib/mu_w": 0.7341610738255034,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9296875,
|
||
|
|
"calib/pce": 0.3782421874999998,
|
||
|
|
"calib/std_conf": 0.18975955807381903,
|
||
|
|
"calib/step_conf_rate": 0.9296875,
|
||
|
|
"calib/step_q_c": 0.8487557603686635,
|
||
|
|
"calib/step_q_c_n": 217.0,
|
||
|
|
"calib/step_q_gap": 0.1251557603686636,
|
||
|
|
"calib/step_q_w": 0.7235999999999999,
|
||
|
|
"calib/step_q_w_n": 250.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1856,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 30093774.0,
|
||
|
|
"reward": 0.8828125,
|
||
|
|
"reward_std": 0.1823364645242691,
|
||
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
||
|
|
"rewards/format_reward_step": 0.9296875,
|
||
|
|
"step": 116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7509335782063055,
|
||
|
|
"calib/avg_num_step_conf": 1.71875,
|
||
|
|
"calib/ece": 0.2668359375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.90234375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.140625,
|
||
|
|
"calib/gap": 0.2454441383532292,
|
||
|
|
"calib/mean_conf": 0.7394921875,
|
||
|
|
"calib/mu_c": 0.8689256198347107,
|
||
|
|
"calib/mu_w": 0.6234814814814815,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.90234375,
|
||
|
|
"calib/pce": 0.2668359375,
|
||
|
|
"calib/std_conf": 0.2719032599132729,
|
||
|
|
"calib/step_conf_rate": 0.90234375,
|
||
|
|
"calib/step_q_c": 0.8196265560165974,
|
||
|
|
"calib/step_q_c_n": 241.0,
|
||
|
|
"calib/step_q_gap": 0.11384766154423565,
|
||
|
|
"calib/step_q_w": 0.7057788944723618,
|
||
|
|
"calib/step_q_w_n": 199.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1872,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.6666666666666666e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 30350870.0,
|
||
|
|
"reward": 0.923828125,
|
||
|
|
"reward_std": 0.241466224193573,
|
||
|
|
"rewards/accuracy_reward_step": 0.47265625,
|
||
|
|
"rewards/format_reward_step": 0.90234375,
|
||
|
|
"step": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7673202614379085,
|
||
|
|
"calib/avg_num_step_conf": 2.0234375,
|
||
|
|
"calib/ece": 0.408671875,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.9140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.16015625,
|
||
|
|
"calib/gap": 0.2034303405572756,
|
||
|
|
"calib/mean_conf": 0.7407031249999999,
|
||
|
|
"calib/mu_c": 0.8765882352941176,
|
||
|
|
"calib/mu_w": 0.673157894736842,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.91796875,
|
||
|
|
"calib/pce": 0.408671875,
|
||
|
|
"calib/std_conf": 0.260792612750504,
|
||
|
|
"calib/step_conf_rate": 0.91796875,
|
||
|
|
"calib/step_q_c": 0.843072625698324,
|
||
|
|
"calib/step_q_c_n": 179.0,
|
||
|
|
"calib/step_q_gap": 0.1142230681762001,
|
||
|
|
"calib/step_q_w": 0.7288495575221239,
|
||
|
|
"calib/step_q_w_n": 339.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1888,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.611111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 30609454.0,
|
||
|
|
"reward": 0.7890625,
|
||
|
|
"reward_std": 0.22457295656204224,
|
||
|
|
"rewards/accuracy_reward_step": 0.33203125,
|
||
|
|
"rewards/format_reward_step": 0.9140625,
|
||
|
|
"step": 118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7685700261780105,
|
||
|
|
"calib/avg_num_step_conf": 1.8203125,
|
||
|
|
"calib/ece": 0.4750588235294119,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.9296875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1568627450980392,
|
||
|
|
"calib/gap": 0.22265543193717285,
|
||
|
|
"calib/mean_conf": 0.7260392156862745,
|
||
|
|
"calib/mu_c": 0.8928125,
|
||
|
|
"calib/mu_w": 0.6701570680628272,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9296875,
|
||
|
|
"calib/pce": 0.4750588235294119,
|
||
|
|
"calib/std_conf": 0.2798185659220613,
|
||
|
|
"calib/step_conf_rate": 0.9296875,
|
||
|
|
"calib/step_q_c": 0.8353543307086614,
|
||
|
|
"calib/step_q_c_n": 127.0,
|
||
|
|
"calib/step_q_gap": 0.12830418321603598,
|
||
|
|
"calib/step_q_w": 0.7070501474926254,
|
||
|
|
"calib/step_q_w_n": 339.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 115.0,
|
||
|
|
"completions/max_terminated_length": 115.0,
|
||
|
|
"completions/mean_length": 0.44921875,
|
||
|
|
"completions/mean_terminated_length": 115.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 115.0,
|
||
|
|
"epoch": 0.1904,
|
||
|
|
"grad_norm": 12.073020935058594,
|
||
|
|
"learning_rate": 4.555555555555555e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 30864129.0,
|
||
|
|
"reward": 0.71484375,
|
||
|
|
"reward_std": 0.1937408149242401,
|
||
|
|
"rewards/accuracy_reward_step": 0.25,
|
||
|
|
"rewards/format_reward_step": 0.9296875,
|
||
|
|
"step": 119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7531453946040503,
|
||
|
|
"calib/avg_num_step_conf": 1.7421875,
|
||
|
|
"calib/ece": 0.4002734375,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.94140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.125,
|
||
|
|
"calib/gap": 0.20211061024019383,
|
||
|
|
"calib/mean_conf": 0.7479296875,
|
||
|
|
"calib/mu_c": 0.8797752808988766,
|
||
|
|
"calib/mu_w": 0.6776646706586827,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.94140625,
|
||
|
|
"calib/pce": 0.4002734375,
|
||
|
|
"calib/std_conf": 0.26558604666125124,
|
||
|
|
"calib/step_conf_rate": 0.94140625,
|
||
|
|
"calib/step_q_c": 0.8450292397660819,
|
||
|
|
"calib/step_q_c_n": 171.0,
|
||
|
|
"calib/step_q_gap": 0.159829239766082,
|
||
|
|
"calib/step_q_w": 0.6851999999999999,
|
||
|
|
"calib/step_q_w_n": 275.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 31120377.0,
|
||
|
|
"reward": 0.818359375,
|
||
|
|
"reward_std": 0.15873654186725616,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 0.94140625,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7926492262343405,
|
||
|
|
"calib/avg_num_step_conf": 1.859375,
|
||
|
|
"calib/ece": 0.20500000000000004,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1640625,
|
||
|
|
"calib/gap": 0.27243429132891184,
|
||
|
|
"calib/mean_conf": 0.7440625,
|
||
|
|
"calib/mu_c": 0.8696376811594202,
|
||
|
|
"calib/mu_w": 0.5972033898305084,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.92578125,
|
||
|
|
"calib/pce": 0.20500000000000004,
|
||
|
|
"calib/std_conf": 0.26311813381777777,
|
||
|
|
"calib/step_conf_rate": 0.92578125,
|
||
|
|
"calib/step_q_c": 0.8423367697594502,
|
||
|
|
"calib/step_q_c_n": 291.0,
|
||
|
|
"calib/step_q_gap": 0.1619043373270177,
|
||
|
|
"calib/step_q_w": 0.6804324324324325,
|
||
|
|
"calib/step_q_w_n": 185.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1936,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 31376505.0,
|
||
|
|
"reward": 1.0,
|
||
|
|
"reward_std": 0.18774083256721497,
|
||
|
|
"rewards/accuracy_reward_step": 0.5390625,
|
||
|
|
"rewards/format_reward_step": 0.921875,
|
||
|
|
"step": 121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.7796604437229437,
|
||
|
|
"calib/avg_num_step_conf": 1.65234375,
|
||
|
|
"calib/ece": 0.33843750000000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8515625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0703125,
|
||
|
|
"calib/gap": 0.26956709956709957,
|
||
|
|
"calib/mean_conf": 0.6821875,
|
||
|
|
"calib/mu_c": 0.859090909090909,
|
||
|
|
"calib/mu_w": 0.5895238095238095,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.86328125,
|
||
|
|
"calib/pce": 0.33843750000000006,
|
||
|
|
"calib/std_conf": 0.3078794343468722,
|
||
|
|
"calib/step_conf_rate": 0.86328125,
|
||
|
|
"calib/step_q_c": 0.8230000000000001,
|
||
|
|
"calib/step_q_c_n": 150.0,
|
||
|
|
"calib/step_q_gap": 0.19248717948717953,
|
||
|
|
"calib/step_q_w": 0.6305128205128205,
|
||
|
|
"calib/step_q_w_n": 273.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1952,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.3888888888888884e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 31637169.0,
|
||
|
|
"reward": 0.76953125,
|
||
|
|
"reward_std": 0.1618141233921051,
|
||
|
|
"rewards/accuracy_reward_step": 0.34375,
|
||
|
|
"rewards/format_reward_step": 0.8515625,
|
||
|
|
"step": 122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.7666422152943334,
|
||
|
|
"calib/avg_num_step_conf": 1.61328125,
|
||
|
|
"calib/ece": 0.2636718750000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.890625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.10546875,
|
||
|
|
"calib/gap": 0.2334384742343666,
|
||
|
|
"calib/mean_conf": 0.739453125,
|
||
|
|
"calib/mu_c": 0.8607317073170732,
|
||
|
|
"calib/mu_w": 0.6272932330827066,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.89453125,
|
||
|
|
"calib/pce": 0.26132812500000013,
|
||
|
|
"calib/std_conf": 0.2541693228494233,
|
||
|
|
"calib/step_conf_rate": 0.89453125,
|
||
|
|
"calib/step_q_c": 0.8172018348623852,
|
||
|
|
"calib/step_q_c_n": 218.0,
|
||
|
|
"calib/step_q_gap": 0.1495095271700776,
|
||
|
|
"calib/step_q_w": 0.6676923076923076,
|
||
|
|
"calib/step_q_w_n": 195.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.1968,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.3333333333333335e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 31893617.0,
|
||
|
|
"reward": 0.92578125,
|
||
|
|
"reward_std": 0.20838135480880737,
|
||
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
||
|
|
"rewards/format_reward_step": 0.890625,
|
||
|
|
"step": 123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7086222992798079,
|
||
|
|
"calib/avg_num_step_conf": 1.98828125,
|
||
|
|
"calib/ece": 0.409372549019608,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.8984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.17254901960784313,
|
||
|
|
"calib/gap": 0.17711189650573456,
|
||
|
|
"calib/mean_conf": 0.770156862745098,
|
||
|
|
"calib/mu_c": 0.8833695652173911,
|
||
|
|
"calib/mu_w": 0.7062576687116565,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.90234375,
|
||
|
|
"calib/pce": 0.409372549019608,
|
||
|
|
"calib/std_conf": 0.24438312676815713,
|
||
|
|
"calib/step_conf_rate": 0.90234375,
|
||
|
|
"calib/step_q_c": 0.841400966183575,
|
||
|
|
"calib/step_q_c_n": 207.0,
|
||
|
|
"calib/step_q_gap": 0.12030825095178699,
|
||
|
|
"calib/step_q_w": 0.721092715231788,
|
||
|
|
"calib/step_q_w_n": 302.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 174.0,
|
||
|
|
"completions/max_terminated_length": 174.0,
|
||
|
|
"completions/mean_length": 0.6796875,
|
||
|
|
"completions/mean_terminated_length": 174.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 174.0,
|
||
|
|
"epoch": 0.1984,
|
||
|
|
"grad_norm": 6.5422492027282715,
|
||
|
|
"learning_rate": 4.2777777777777775e-07,
|
||
|
|
"loss": 0.0211,
|
||
|
|
"num_tokens": 32152799.0,
|
||
|
|
"reward": 0.80859375,
|
||
|
|
"reward_std": 0.24629858136177063,
|
||
|
|
"rewards/accuracy_reward_step": 0.359375,
|
||
|
|
"rewards/format_reward_step": 0.8984375,
|
||
|
|
"step": 124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.8502252252252251,
|
||
|
|
"calib/avg_num_step_conf": 1.93359375,
|
||
|
|
"calib/ece": 0.30007812500000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.94140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.078125,
|
||
|
|
"calib/gap": 0.25560060060060086,
|
||
|
|
"calib/mean_conf": 0.721953125,
|
||
|
|
"calib/mu_c": 0.8697222222222225,
|
||
|
|
"calib/mu_w": 0.6141216216216216,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.94921875,
|
||
|
|
"calib/pce": 0.30007812500000003,
|
||
|
|
"calib/std_conf": 0.24888205851915962,
|
||
|
|
"calib/step_conf_rate": 0.94921875,
|
||
|
|
"calib/step_q_c": 0.8381142857142858,
|
||
|
|
"calib/step_q_c_n": 175.0,
|
||
|
|
"calib/step_q_gap": 0.1968330357142859,
|
||
|
|
"calib/step_q_w": 0.6412812499999999,
|
||
|
|
"calib/step_q_w_n": 320.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.222222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 32413503.0,
|
||
|
|
"reward": 0.892578125,
|
||
|
|
"reward_std": 0.148696631193161,
|
||
|
|
"rewards/accuracy_reward_step": 0.421875,
|
||
|
|
"rewards/format_reward_step": 0.94140625,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7811994525320394,
|
||
|
|
"calib/avg_num_step_conf": 1.6484375,
|
||
|
|
"calib/ece": 0.30580392156862746,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.90625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.11764705882352941,
|
||
|
|
"calib/gap": 0.24642777155655093,
|
||
|
|
"calib/mean_conf": 0.7528627450980392,
|
||
|
|
"calib/mu_c": 0.8891228070175439,
|
||
|
|
"calib/mu_w": 0.6426950354609929,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.91015625,
|
||
|
|
"calib/pce": 0.30580392156862746,
|
||
|
|
"calib/std_conf": 0.2725235028258378,
|
||
|
|
"calib/step_conf_rate": 0.91015625,
|
||
|
|
"calib/step_q_c": 0.8609844559585493,
|
||
|
|
"calib/step_q_c_n": 193.0,
|
||
|
|
"calib/step_q_gap": 0.13076611534719573,
|
||
|
|
"calib/step_q_w": 0.7302183406113536,
|
||
|
|
"calib/step_q_w_n": 229.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 120.0,
|
||
|
|
"completions/max_terminated_length": 120.0,
|
||
|
|
"completions/mean_length": 0.46875,
|
||
|
|
"completions/mean_terminated_length": 120.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 120.0,
|
||
|
|
"epoch": 0.2016,
|
||
|
|
"grad_norm": 11.591796875,
|
||
|
|
"learning_rate": 4.1666666666666667e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 32674999.0,
|
||
|
|
"reward": 0.8984375,
|
||
|
|
"reward_std": 0.1374414563179016,
|
||
|
|
"rewards/accuracy_reward_step": 0.4453125,
|
||
|
|
"rewards/format_reward_step": 0.90625,
|
||
|
|
"step": 126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.76854035639413,
|
||
|
|
"calib/avg_num_step_conf": 1.76171875,
|
||
|
|
"calib/ece": 0.32592156862745103,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.85546875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.15294117647058825,
|
||
|
|
"calib/gap": 0.28083333333333327,
|
||
|
|
"calib/mean_conf": 0.702392156862745,
|
||
|
|
"calib/mu_c": 0.8775,
|
||
|
|
"calib/mu_w": 0.5966666666666667,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.86328125,
|
||
|
|
"calib/pce": 0.32592156862745103,
|
||
|
|
"calib/std_conf": 0.3155029646814732,
|
||
|
|
"calib/step_conf_rate": 0.86328125,
|
||
|
|
"calib/step_q_c": 0.8281818181818181,
|
||
|
|
"calib/step_q_c_n": 198.0,
|
||
|
|
"calib/step_q_gap": 0.1588537549407113,
|
||
|
|
"calib/step_q_w": 0.6693280632411068,
|
||
|
|
"calib/step_q_w_n": 253.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 206.0,
|
||
|
|
"completions/max_terminated_length": 206.0,
|
||
|
|
"completions/mean_length": 0.8046875,
|
||
|
|
"completions/mean_terminated_length": 206.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 206.0,
|
||
|
|
"epoch": 0.2032,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.1111111111111107e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 32937005.0,
|
||
|
|
"reward": 0.806640625,
|
||
|
|
"reward_std": 0.15518707036972046,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.85546875,
|
||
|
|
"step": 127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.641909120627656,
|
||
|
|
"calib/avg_num_step_conf": 1.5390625,
|
||
|
|
"calib/ece": 0.4167187500000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.92578125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.11328125,
|
||
|
|
"calib/gap": 0.12800523046747292,
|
||
|
|
"calib/mean_conf": 0.7878124999999999,
|
||
|
|
"calib/mu_c": 0.868315789473684,
|
||
|
|
"calib/mu_w": 0.7403105590062111,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.92578125,
|
||
|
|
"calib/pce": 0.4167187500000001,
|
||
|
|
"calib/std_conf": 0.22554426360195906,
|
||
|
|
"calib/step_conf_rate": 0.92578125,
|
||
|
|
"calib/step_q_c": 0.8166473988439304,
|
||
|
|
"calib/step_q_c_n": 173.0,
|
||
|
|
"calib/step_q_gap": 0.045108937305468944,
|
||
|
|
"calib/step_q_w": 0.7715384615384615,
|
||
|
|
"calib/step_q_w_n": 221.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2048,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.055555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 33197445.0,
|
||
|
|
"reward": 0.833984375,
|
||
|
|
"reward_std": 0.15270306169986725,
|
||
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
||
|
|
"rewards/format_reward_step": 0.92578125,
|
||
|
|
"step": 128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8298757763975155,
|
||
|
|
"calib/avg_num_step_conf": 1.61328125,
|
||
|
|
"calib/ece": 0.27121568627450976,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.86328125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.11764705882352941,
|
||
|
|
"calib/gap": 0.27840062111801234,
|
||
|
|
"calib/mean_conf": 0.7221960784313726,
|
||
|
|
"calib/mu_c": 0.8750434782608695,
|
||
|
|
"calib/mu_w": 0.5966428571428571,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8671875,
|
||
|
|
"calib/pce": 0.27121568627450976,
|
||
|
|
"calib/std_conf": 0.2796494317721809,
|
||
|
|
"calib/step_conf_rate": 0.8671875,
|
||
|
|
"calib/step_q_c": 0.8413861386138615,
|
||
|
|
"calib/step_q_c_n": 202.0,
|
||
|
|
"calib/step_q_gap": 0.17053305804514118,
|
||
|
|
"calib/step_q_w": 0.6708530805687203,
|
||
|
|
"calib/step_q_w_n": 211.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2064,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 33454925.0,
|
||
|
|
"reward": 0.880859375,
|
||
|
|
"reward_std": 0.2340913712978363,
|
||
|
|
"rewards/accuracy_reward_step": 0.44921875,
|
||
|
|
"rewards/format_reward_step": 0.86328125,
|
||
|
|
"step": 129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8435849056603772,
|
||
|
|
"calib/avg_num_step_conf": 1.80859375,
|
||
|
|
"calib/ece": 0.3078906250000001,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.89453125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0859375,
|
||
|
|
"calib/gap": 0.2810037735849057,
|
||
|
|
"calib/mean_conf": 0.721953125,
|
||
|
|
"calib/mu_c": 0.8866037735849057,
|
||
|
|
"calib/mu_w": 0.6056,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.89453125,
|
||
|
|
"calib/pce": 0.3078906250000001,
|
||
|
|
"calib/std_conf": 0.26936719418432226,
|
||
|
|
"calib/step_conf_rate": 0.89453125,
|
||
|
|
"calib/step_q_c": 0.8460000000000001,
|
||
|
|
"calib/step_q_c_n": 190.0,
|
||
|
|
"calib/step_q_gap": 0.18424175824175837,
|
||
|
|
"calib/step_q_w": 0.6617582417582417,
|
||
|
|
"calib/step_q_w_n": 273.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.208,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.9444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 33710109.0,
|
||
|
|
"reward": 0.861328125,
|
||
|
|
"reward_std": 0.12092234194278717,
|
||
|
|
"rewards/accuracy_reward_step": 0.4140625,
|
||
|
|
"rewards/format_reward_step": 0.89453125,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7044871794871794,
|
||
|
|
"calib/avg_num_step_conf": 1.734375,
|
||
|
|
"calib/ece": 0.33089843750000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.921875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.13671875,
|
||
|
|
"calib/gap": 0.21905384615384627,
|
||
|
|
"calib/mean_conf": 0.7094140625,
|
||
|
|
"calib/mu_c": 0.8429000000000002,
|
||
|
|
"calib/mu_w": 0.6238461538461539,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.92578125,
|
||
|
|
"calib/pce": 0.32484375,
|
||
|
|
"calib/std_conf": 0.29053568593590373,
|
||
|
|
"calib/step_conf_rate": 0.92578125,
|
||
|
|
"calib/step_q_c": 0.8193820224719103,
|
||
|
|
"calib/step_q_c_n": 178.0,
|
||
|
|
"calib/step_q_gap": 0.12550984202078252,
|
||
|
|
"calib/step_q_w": 0.6938721804511278,
|
||
|
|
"calib/step_q_w_n": 266.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2096,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.888888888888889e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 33972253.0,
|
||
|
|
"reward": 0.8515625,
|
||
|
|
"reward_std": 0.23300394415855408,
|
||
|
|
"rewards/accuracy_reward_step": 0.390625,
|
||
|
|
"rewards/format_reward_step": 0.921875,
|
||
|
|
"step": 131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7979328723710746,
|
||
|
|
"calib/avg_num_step_conf": 1.46875,
|
||
|
|
"calib/ece": 0.40511718750000014,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8359375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.125,
|
||
|
|
"calib/gap": 0.2582699510227602,
|
||
|
|
"calib/mean_conf": 0.7082421875,
|
||
|
|
"calib/mu_c": 0.887820512820513,
|
||
|
|
"calib/mu_w": 0.6295505617977528,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8359375,
|
||
|
|
"calib/pce": 0.4043359375000001,
|
||
|
|
"calib/std_conf": 0.2905242364592236,
|
||
|
|
"calib/step_conf_rate": 0.8359375,
|
||
|
|
"calib/step_q_c": 0.8483333333333334,
|
||
|
|
"calib/step_q_c_n": 150.0,
|
||
|
|
"calib/step_q_gap": 0.14718289085545733,
|
||
|
|
"calib/step_q_w": 0.7011504424778761,
|
||
|
|
"calib/step_q_w_n": 226.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2112,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.8333333333333335e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 34232701.0,
|
||
|
|
"reward": 0.72265625,
|
||
|
|
"reward_std": 0.17576810717582703,
|
||
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
||
|
|
"rewards/format_reward_step": 0.8359375,
|
||
|
|
"step": 132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7485119047619048,
|
||
|
|
"calib/avg_num_step_conf": 1.61328125,
|
||
|
|
"calib/ece": 0.40699218750000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.8671875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.109375,
|
||
|
|
"calib/gap": 0.1980980066445185,
|
||
|
|
"calib/mean_conf": 0.7351171875,
|
||
|
|
"calib/mu_c": 0.8682142857142858,
|
||
|
|
"calib/mu_w": 0.6701162790697673,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.87109375,
|
||
|
|
"calib/pce": 0.40699218750000005,
|
||
|
|
"calib/std_conf": 0.2587771459520524,
|
||
|
|
"calib/step_conf_rate": 0.87109375,
|
||
|
|
"calib/step_q_c": 0.8102068965517241,
|
||
|
|
"calib/step_q_c_n": 145.0,
|
||
|
|
"calib/step_q_gap": 0.07371435923829128,
|
||
|
|
"calib/step_q_w": 0.7364925373134328,
|
||
|
|
"calib/step_q_w_n": 268.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2128,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.7777777777777775e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 34489189.0,
|
||
|
|
"reward": 0.76171875,
|
||
|
|
"reward_std": 0.1624026745557785,
|
||
|
|
"rewards/accuracy_reward_step": 0.328125,
|
||
|
|
"rewards/format_reward_step": 0.8671875,
|
||
|
|
"step": 133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.6898434898434899,
|
||
|
|
"calib/avg_num_step_conf": 1.76953125,
|
||
|
|
"calib/ece": 0.41687500000000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.95703125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.140625,
|
||
|
|
"calib/gap": 0.15460006660006664,
|
||
|
|
"calib/mean_conf": 0.7645312499999999,
|
||
|
|
"calib/mu_c": 0.8641758241758242,
|
||
|
|
"calib/mu_w": 0.7095757575757575,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9609375,
|
||
|
|
"calib/pce": 0.41296875000000005,
|
||
|
|
"calib/std_conf": 0.24761632170242232,
|
||
|
|
"calib/step_conf_rate": 0.9609375,
|
||
|
|
"calib/step_q_c": 0.8281756756756757,
|
||
|
|
"calib/step_q_c_n": 148.0,
|
||
|
|
"calib/step_q_gap": 0.0925035445281348,
|
||
|
|
"calib/step_q_w": 0.7356721311475409,
|
||
|
|
"calib/step_q_w_n": 305.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2144,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.722222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 34750813.0,
|
||
|
|
"reward": 0.833984375,
|
||
|
|
"reward_std": 0.16388335824012756,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.95703125,
|
||
|
|
"step": 134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.8336483336483337,
|
||
|
|
"calib/avg_num_step_conf": 1.77734375,
|
||
|
|
"calib/ece": 0.26476377952755914,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.87890625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09448818897637795,
|
||
|
|
"calib/gap": 0.2992912492912495,
|
||
|
|
"calib/mean_conf": 0.7017716535433071,
|
||
|
|
"calib/mu_c": 0.8702702702702705,
|
||
|
|
"calib/mu_w": 0.570979020979021,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.88671875,
|
||
|
|
"calib/pce": 0.26476377952755914,
|
||
|
|
"calib/std_conf": 0.2823460681302369,
|
||
|
|
"calib/step_conf_rate": 0.88671875,
|
||
|
|
"calib/step_q_c": 0.8374324324324326,
|
||
|
|
"calib/step_q_c_n": 222.0,
|
||
|
|
"calib/step_q_gap": 0.19880582299037253,
|
||
|
|
"calib/step_q_w": 0.6386266094420601,
|
||
|
|
"calib/step_q_w_n": 233.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 391.0,
|
||
|
|
"completions/max_terminated_length": 391.0,
|
||
|
|
"completions/mean_length": 1.80859375,
|
||
|
|
"completions/mean_terminated_length": 231.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 72.0,
|
||
|
|
"epoch": 0.216,
|
||
|
|
"grad_norm": 6.3882737159729,
|
||
|
|
"learning_rate": 3.666666666666666e-07,
|
||
|
|
"loss": 0.0316,
|
||
|
|
"num_tokens": 35013356.0,
|
||
|
|
"reward": 0.873046875,
|
||
|
|
"reward_std": 0.16735684871673584,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.87890625,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.8624868972746331,
|
||
|
|
"calib/avg_num_step_conf": 1.5625,
|
||
|
|
"calib/ece": 0.3145490196078432,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.90625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08235294117647059,
|
||
|
|
"calib/gap": 0.31093356918239023,
|
||
|
|
"calib/mean_conf": 0.6910196078431373,
|
||
|
|
"calib/mu_c": 0.8848958333333335,
|
||
|
|
"calib/mu_w": 0.5739622641509433,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.91015625,
|
||
|
|
"calib/pce": 0.3145490196078432,
|
||
|
|
"calib/std_conf": 0.29591556096862437,
|
||
|
|
"calib/step_conf_rate": 0.91015625,
|
||
|
|
"calib/step_q_c": 0.8440853658536586,
|
||
|
|
"calib/step_q_c_n": 164.0,
|
||
|
|
"calib/step_q_gap": 0.20489045059942146,
|
||
|
|
"calib/step_q_w": 0.6391949152542371,
|
||
|
|
"calib/step_q_w_n": 236.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 150.0,
|
||
|
|
"completions/max_terminated_length": 150.0,
|
||
|
|
"completions/mean_length": 0.5859375,
|
||
|
|
"completions/mean_terminated_length": 150.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 150.0,
|
||
|
|
"epoch": 0.2176,
|
||
|
|
"grad_norm": 12.954962730407715,
|
||
|
|
"learning_rate": 3.6111111111111107e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 35275538.0,
|
||
|
|
"reward": 0.83203125,
|
||
|
|
"reward_std": 0.16031301021575928,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.90625,
|
||
|
|
"step": 136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8614053216223199,
|
||
|
|
"calib/avg_num_step_conf": 1.04296875,
|
||
|
|
"calib/ece": 0.23367187500000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.7890625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.046875,
|
||
|
|
"calib/gap": 0.4158847842934643,
|
||
|
|
"calib/mean_conf": 0.616484375,
|
||
|
|
"calib/mu_c": 0.8731632653061225,
|
||
|
|
"calib/mu_w": 0.4572784810126582,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.79296875,
|
||
|
|
"calib/pce": 0.23367187500000003,
|
||
|
|
"calib/std_conf": 0.3621260783634056,
|
||
|
|
"calib/step_conf_rate": 0.79296875,
|
||
|
|
"calib/step_q_c": 0.8496350364963504,
|
||
|
|
"calib/step_q_c_n": 137.0,
|
||
|
|
"calib/step_q_gap": 0.3046350364963505,
|
||
|
|
"calib/step_q_w": 0.5449999999999999,
|
||
|
|
"calib/step_q_w_n": 130.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2192,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.5555555555555553e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 35537098.0,
|
||
|
|
"reward": 0.77734375,
|
||
|
|
"reward_std": 0.15790295600891113,
|
||
|
|
"rewards/accuracy_reward_step": 0.3828125,
|
||
|
|
"rewards/format_reward_step": 0.7890625,
|
||
|
|
"step": 137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.8096899224806201,
|
||
|
|
"calib/avg_num_step_conf": 1.25,
|
||
|
|
"calib/ece": 0.31936254980079687,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.77734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.11553784860557768,
|
||
|
|
"calib/gap": 0.321377026074701,
|
||
|
|
"calib/mean_conf": 0.66199203187251,
|
||
|
|
"calib/mu_c": 0.8732558139534888,
|
||
|
|
"calib/mu_w": 0.5518787878787879,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.77734375,
|
||
|
|
"calib/pce": 0.31936254980079687,
|
||
|
|
"calib/std_conf": 0.3190698882094751,
|
||
|
|
"calib/step_conf_rate": 0.77734375,
|
||
|
|
"calib/step_q_c": 0.8474468085106381,
|
||
|
|
"calib/step_q_c_n": 141.0,
|
||
|
|
"calib/step_q_gap": 0.1861060263877331,
|
||
|
|
"calib/step_q_w": 0.661340782122905,
|
||
|
|
"calib/step_q_w_n": 179.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 98.0,
|
||
|
|
"completions/max_terminated_length": 98.0,
|
||
|
|
"completions/mean_length": 1.03125,
|
||
|
|
"completions/mean_terminated_length": 88.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 74.0,
|
||
|
|
"epoch": 0.2208,
|
||
|
|
"grad_norm": 14.619424819946289,
|
||
|
|
"learning_rate": 3.5e-07,
|
||
|
|
"loss": 0.0438,
|
||
|
|
"num_tokens": 35798546.0,
|
||
|
|
"reward": 0.724609375,
|
||
|
|
"reward_std": 0.19636039435863495,
|
||
|
|
"rewards/accuracy_reward_step": 0.3359375,
|
||
|
|
"rewards/format_reward_step": 0.77734375,
|
||
|
|
"step": 138
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7978445830969937,
|
||
|
|
"calib/avg_num_step_conf": 1.6484375,
|
||
|
|
"calib/ece": 0.48445312499999993,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.84765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0859375,
|
||
|
|
"calib/gap": 0.27447078842881467,
|
||
|
|
"calib/mean_conf": 0.6446093749999999,
|
||
|
|
"calib/mu_c": 0.8751219512195123,
|
||
|
|
"calib/mu_w": 0.6006511627906976,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8515625,
|
||
|
|
"calib/pce": 0.48445312499999993,
|
||
|
|
"calib/std_conf": 0.32561096981230436,
|
||
|
|
"calib/step_conf_rate": 0.8515625,
|
||
|
|
"calib/step_q_c": 0.8340624999999999,
|
||
|
|
"calib/step_q_c_n": 64.0,
|
||
|
|
"calib/step_q_gap": 0.17202339385474852,
|
||
|
|
"calib/step_q_w": 0.6620391061452514,
|
||
|
|
"calib/step_q_w_n": 358.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2224,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.4444444444444444e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 36059962.0,
|
||
|
|
"reward": 0.583984375,
|
||
|
|
"reward_std": 0.08932922780513763,
|
||
|
|
"rewards/accuracy_reward_step": 0.16015625,
|
||
|
|
"rewards/format_reward_step": 0.84765625,
|
||
|
|
"step": 139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.8151722837022133,
|
||
|
|
"calib/avg_num_step_conf": 1.5703125,
|
||
|
|
"calib/ece": 0.2853149606299213,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.94140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0984251968503937,
|
||
|
|
"calib/gap": 0.26493838028169014,
|
||
|
|
"calib/mean_conf": 0.7262598425196851,
|
||
|
|
"calib/mu_c": 0.874375,
|
||
|
|
"calib/mu_w": 0.6094366197183099,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.94140625,
|
||
|
|
"calib/pce": 0.2853149606299213,
|
||
|
|
"calib/std_conf": 0.28053763660644737,
|
||
|
|
"calib/step_conf_rate": 0.94140625,
|
||
|
|
"calib/step_q_c": 0.8384831460674157,
|
||
|
|
"calib/step_q_c_n": 178.0,
|
||
|
|
"calib/step_q_gap": 0.16058136035312998,
|
||
|
|
"calib/step_q_w": 0.6779017857142857,
|
||
|
|
"calib/step_q_w_n": 224.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 216.0,
|
||
|
|
"completions/max_terminated_length": 216.0,
|
||
|
|
"completions/mean_length": 1.07421875,
|
||
|
|
"completions/mean_terminated_length": 137.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 59.0,
|
||
|
|
"epoch": 0.224,
|
||
|
|
"grad_norm": 19.582271575927734,
|
||
|
|
"learning_rate": 3.388888888888889e-07,
|
||
|
|
"loss": 0.064,
|
||
|
|
"num_tokens": 36321421.0,
|
||
|
|
"reward": 0.908203125,
|
||
|
|
"reward_std": 0.1559145301580429,
|
||
|
|
"rewards/accuracy_reward_step": 0.4375,
|
||
|
|
"rewards/format_reward_step": 0.94140625,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.837111644354635,
|
||
|
|
"calib/avg_num_step_conf": 1.171875,
|
||
|
|
"calib/ece": 0.3227843137254903,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.84375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09803921568627451,
|
||
|
|
"calib/gap": 0.24756125284162656,
|
||
|
|
"calib/mean_conf": 0.7423921568627451,
|
||
|
|
"calib/mu_c": 0.88607476635514,
|
||
|
|
"calib/mu_w": 0.6385135135135135,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.84375,
|
||
|
|
"calib/pce": 0.3227843137254903,
|
||
|
|
"calib/std_conf": 0.26380355721868176,
|
||
|
|
"calib/step_conf_rate": 0.84375,
|
||
|
|
"calib/step_q_c": 0.8762585034013606,
|
||
|
|
"calib/step_q_c_n": 147.0,
|
||
|
|
"calib/step_q_gap": 0.20011471255168733,
|
||
|
|
"calib/step_q_w": 0.6761437908496732,
|
||
|
|
"calib/step_q_w_n": 153.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 329.0,
|
||
|
|
"completions/max_terminated_length": 329.0,
|
||
|
|
"completions/mean_length": 1.28515625,
|
||
|
|
"completions/mean_terminated_length": 329.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 329.0,
|
||
|
|
"epoch": 0.2256,
|
||
|
|
"grad_norm": 3.8931891918182373,
|
||
|
|
"learning_rate": 3.333333333333333e-07,
|
||
|
|
"loss": 0.0387,
|
||
|
|
"num_tokens": 36583502.0,
|
||
|
|
"reward": 0.83984375,
|
||
|
|
"reward_std": 0.1878194510936737,
|
||
|
|
"rewards/accuracy_reward_step": 0.41796875,
|
||
|
|
"rewards/format_reward_step": 0.84375,
|
||
|
|
"step": 141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8214285714285714,
|
||
|
|
"calib/avg_num_step_conf": 1.37890625,
|
||
|
|
"calib/ece": 0.33768627450980393,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09411764705882353,
|
||
|
|
"calib/gap": 0.29801108374384244,
|
||
|
|
"calib/mean_conf": 0.6710196078431373,
|
||
|
|
"calib/mu_c": 0.8673563218390805,
|
||
|
|
"calib/mu_w": 0.5693452380952381,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.828125,
|
||
|
|
"calib/pce": 0.33376470588235296,
|
||
|
|
"calib/std_conf": 0.31803646912573386,
|
||
|
|
"calib/step_conf_rate": 0.828125,
|
||
|
|
"calib/step_q_c": 0.8450993377483443,
|
||
|
|
"calib/step_q_c_n": 151.0,
|
||
|
|
"calib/step_q_gap": 0.17544587240180975,
|
||
|
|
"calib/step_q_w": 0.6696534653465346,
|
||
|
|
"calib/step_q_w_n": 202.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 71.0,
|
||
|
|
"completions/max_terminated_length": 71.0,
|
||
|
|
"completions/mean_length": 0.27734375,
|
||
|
|
"completions/mean_terminated_length": 71.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 71.0,
|
||
|
|
"epoch": 0.2272,
|
||
|
|
"grad_norm": 4.603307247161865,
|
||
|
|
"learning_rate": 3.2777777777777776e-07,
|
||
|
|
"loss": 0.0211,
|
||
|
|
"num_tokens": 36844285.0,
|
||
|
|
"reward": 0.75390625,
|
||
|
|
"reward_std": 0.2743779122829437,
|
||
|
|
"rewards/accuracy_reward_step": 0.33984375,
|
||
|
|
"rewards/format_reward_step": 0.828125,
|
||
|
|
"step": 142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.5820154352226721,
|
||
|
|
"calib/avg_num_step_conf": 1.3046875,
|
||
|
|
"calib/ece": 0.3389453124999998,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.83203125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.03515625,
|
||
|
|
"calib/gap": 0.14803137651821863,
|
||
|
|
"calib/mean_conf": 0.7444140625,
|
||
|
|
"calib/mu_c": 0.8323076923076923,
|
||
|
|
"calib/mu_w": 0.6842763157894737,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.84765625,
|
||
|
|
"calib/pce": 0.33855468749999984,
|
||
|
|
"calib/std_conf": 0.25949102908818655,
|
||
|
|
"calib/step_conf_rate": 0.84765625,
|
||
|
|
"calib/step_q_c": 0.8037419354838711,
|
||
|
|
"calib/step_q_c_n": 155.0,
|
||
|
|
"calib/step_q_gap": 0.0545799243106867,
|
||
|
|
"calib/step_q_w": 0.7491620111731844,
|
||
|
|
"calib/step_q_w_n": 179.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2288,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.222222222222222e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 37104013.0,
|
||
|
|
"reward": 0.822265625,
|
||
|
|
"reward_std": 0.19560091197490692,
|
||
|
|
"rewards/accuracy_reward_step": 0.40625,
|
||
|
|
"rewards/format_reward_step": 0.83203125,
|
||
|
|
"step": 143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.762091978326946,
|
||
|
|
"calib/avg_num_step_conf": 1.29296875,
|
||
|
|
"calib/ece": 0.383313725490196,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.8359375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07058823529411765,
|
||
|
|
"calib/gap": 0.19431710056825668,
|
||
|
|
"calib/mean_conf": 0.7519411764705882,
|
||
|
|
"calib/mu_c": 0.8746276595744679,
|
||
|
|
"calib/mu_w": 0.6803105590062112,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.83984375,
|
||
|
|
"calib/pce": 0.383313725490196,
|
||
|
|
"calib/std_conf": 0.2457981326820618,
|
||
|
|
"calib/step_conf_rate": 0.83984375,
|
||
|
|
"calib/step_q_c": 0.8461481481481482,
|
||
|
|
"calib/step_q_c_n": 135.0,
|
||
|
|
"calib/step_q_gap": 0.09543386243386254,
|
||
|
|
"calib/step_q_w": 0.7507142857142857,
|
||
|
|
"calib/step_q_w_n": 196.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2304,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.166666666666666e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 37363837.0,
|
||
|
|
"reward": 0.7890625,
|
||
|
|
"reward_std": 0.23823988437652588,
|
||
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
||
|
|
"rewards/format_reward_step": 0.8359375,
|
||
|
|
"step": 144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7471633274978928,
|
||
|
|
"calib/avg_num_step_conf": 0.9765625,
|
||
|
|
"calib/ece": 0.315078125,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.796875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05078125,
|
||
|
|
"calib/gap": 0.24671335019127283,
|
||
|
|
"calib/mean_conf": 0.6939843750000001,
|
||
|
|
"calib/mu_c": 0.8472164948453608,
|
||
|
|
"calib/mu_w": 0.6005031446540879,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.796875,
|
||
|
|
"calib/pce": 0.315078125,
|
||
|
|
"calib/std_conf": 0.2903199739181915,
|
||
|
|
"calib/step_conf_rate": 0.796875,
|
||
|
|
"calib/step_q_c": 0.8206422018348626,
|
||
|
|
"calib/step_q_c_n": 109.0,
|
||
|
|
"calib/step_q_gap": 0.141635109636281,
|
||
|
|
"calib/step_q_w": 0.6790070921985816,
|
||
|
|
"calib/step_q_w_n": 141.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.232,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3.111111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 37625069.0,
|
||
|
|
"reward": 0.77734375,
|
||
|
|
"reward_std": 0.22878046333789825,
|
||
|
|
"rewards/accuracy_reward_step": 0.37890625,
|
||
|
|
"rewards/format_reward_step": 0.796875,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96875,
|
||
|
|
"calib/auroc": 0.739271331487541,
|
||
|
|
"calib/avg_num_step_conf": 1.25,
|
||
|
|
"calib/ece": 0.33418972332015806,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.83984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.10276679841897234,
|
||
|
|
"calib/gap": 0.16757676818525058,
|
||
|
|
"calib/mean_conf": 0.7610671936758894,
|
||
|
|
"calib/mu_c": 0.8518103448275864,
|
||
|
|
"calib/mu_w": 0.6842335766423359,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.84765625,
|
||
|
|
"calib/pce": 0.31837944664031614,
|
||
|
|
"calib/std_conf": 0.2577004715448092,
|
||
|
|
"calib/step_conf_rate": 0.84765625,
|
||
|
|
"calib/step_q_c": 0.8490196078431373,
|
||
|
|
"calib/step_q_c_n": 153.0,
|
||
|
|
"calib/step_q_gap": 0.10327110484912538,
|
||
|
|
"calib/step_q_w": 0.745748502994012,
|
||
|
|
"calib/step_q_w_n": 167.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 168.0,
|
||
|
|
"completions/max_terminated_length": 168.0,
|
||
|
|
"completions/mean_length": 1.40234375,
|
||
|
|
"completions/mean_terminated_length": 119.66667175292969,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 76.0,
|
||
|
|
"epoch": 0.2336,
|
||
|
|
"grad_norm": 20.80350685119629,
|
||
|
|
"learning_rate": 3.055555555555556e-07,
|
||
|
|
"loss": 0.0497,
|
||
|
|
"num_tokens": 37887572.0,
|
||
|
|
"reward": 0.873046875,
|
||
|
|
"reward_std": 0.19967760145664215,
|
||
|
|
"rewards/accuracy_reward_step": 0.453125,
|
||
|
|
"rewards/format_reward_step": 0.83984375,
|
||
|
|
"step": 146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.7865740740740742,
|
||
|
|
"calib/avg_num_step_conf": 1.3671875,
|
||
|
|
"calib/ece": 0.22133333333333344,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.91015625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08627450980392157,
|
||
|
|
"calib/gap": 0.21573148148148147,
|
||
|
|
"calib/mean_conf": 0.7499607843137254,
|
||
|
|
"calib/mu_c": 0.8514814814814814,
|
||
|
|
"calib/mu_w": 0.6357499999999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
||
|
|
"calib/pce": 0.22094117647058836,
|
||
|
|
"calib/std_conf": 0.23815384389946498,
|
||
|
|
"calib/step_conf_rate": 0.921875,
|
||
|
|
"calib/step_q_c": 0.8319897959183673,
|
||
|
|
"calib/step_q_c_n": 196.0,
|
||
|
|
"calib/step_q_gap": 0.1154313543599258,
|
||
|
|
"calib/step_q_w": 0.7165584415584415,
|
||
|
|
"calib/step_q_w_n": 154.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2352,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 3e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 38146868.0,
|
||
|
|
"reward": 0.982421875,
|
||
|
|
"reward_std": 0.26381731033325195,
|
||
|
|
"rewards/accuracy_reward_step": 0.52734375,
|
||
|
|
"rewards/format_reward_step": 0.91015625,
|
||
|
|
"step": 147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.771933062630737,
|
||
|
|
"calib/avg_num_step_conf": 1.19921875,
|
||
|
|
"calib/ece": 0.24882352941176455,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.83203125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.10588235294117647,
|
||
|
|
"calib/gap": 0.2499077150239939,
|
||
|
|
"calib/mean_conf": 0.7429411764705882,
|
||
|
|
"calib/mu_c": 0.8693650793650791,
|
||
|
|
"calib/mu_w": 0.6194573643410852,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.83984375,
|
||
|
|
"calib/pce": 0.24882352941176455,
|
||
|
|
"calib/std_conf": 0.2541644837011783,
|
||
|
|
"calib/step_conf_rate": 0.83984375,
|
||
|
|
"calib/step_q_c": 0.8450802139037432,
|
||
|
|
"calib/step_q_c_n": 187.0,
|
||
|
|
"calib/step_q_gap": 0.11258021390374329,
|
||
|
|
"calib/step_q_w": 0.7324999999999999,
|
||
|
|
"calib/step_q_w_n": 120.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 125.0,
|
||
|
|
"completions/max_terminated_length": 125.0,
|
||
|
|
"completions/mean_length": 0.48828125,
|
||
|
|
"completions/mean_terminated_length": 125.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 125.0,
|
||
|
|
"epoch": 0.2368,
|
||
|
|
"grad_norm": 9.787782669067383,
|
||
|
|
"learning_rate": 2.9444444444444444e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 38404913.0,
|
||
|
|
"reward": 0.908203125,
|
||
|
|
"reward_std": 0.23260335624217987,
|
||
|
|
"rewards/accuracy_reward_step": 0.4921875,
|
||
|
|
"rewards/format_reward_step": 0.83203125,
|
||
|
|
"step": 148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.8605001400952648,
|
||
|
|
"calib/avg_num_step_conf": 0.9140625,
|
||
|
|
"calib/ece": 0.33235294117647063,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.6484375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0392156862745098,
|
||
|
|
"calib/gap": 0.3200721490613617,
|
||
|
|
"calib/mean_conf": 0.657843137254902,
|
||
|
|
"calib/mu_c": 0.873734939759036,
|
||
|
|
"calib/mu_w": 0.5536627906976743,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.66015625,
|
||
|
|
"calib/pce": 0.33235294117647063,
|
||
|
|
"calib/std_conf": 0.3017693816605732,
|
||
|
|
"calib/step_conf_rate": 0.66015625,
|
||
|
|
"calib/step_q_c": 0.8461475409836066,
|
||
|
|
"calib/step_q_c_n": 122.0,
|
||
|
|
"calib/step_q_gap": 0.26802254098360656,
|
||
|
|
"calib/step_q_w": 0.578125,
|
||
|
|
"calib/step_q_w_n": 112.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 128.0,
|
||
|
|
"completions/max_terminated_length": 128.0,
|
||
|
|
"completions/mean_length": 0.5,
|
||
|
|
"completions/mean_terminated_length": 128.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 128.0,
|
||
|
|
"epoch": 0.2384,
|
||
|
|
"grad_norm": 4.38532018661499,
|
||
|
|
"learning_rate": 2.8888888888888885e-07,
|
||
|
|
"loss": 0.0113,
|
||
|
|
"num_tokens": 38666121.0,
|
||
|
|
"reward": 0.6484375,
|
||
|
|
"reward_std": 0.16728198528289795,
|
||
|
|
"rewards/accuracy_reward_step": 0.32421875,
|
||
|
|
"rewards/format_reward_step": 0.6484375,
|
||
|
|
"step": 149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7517543859649123,
|
||
|
|
"calib/avg_num_step_conf": 1.00390625,
|
||
|
|
"calib/ece": 0.44574218750000005,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0703125,
|
||
|
|
"calib/gap": 0.22140510366826172,
|
||
|
|
"calib/mean_conf": 0.7035546875,
|
||
|
|
"calib/mu_c": 0.867878787878788,
|
||
|
|
"calib/mu_w": 0.6464736842105263,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7421875,
|
||
|
|
"calib/pce": 0.44574218750000005,
|
||
|
|
"calib/std_conf": 0.27628298187144523,
|
||
|
|
"calib/step_conf_rate": 0.7421875,
|
||
|
|
"calib/step_q_c": 0.8296341463414633,
|
||
|
|
"calib/step_q_c_n": 82.0,
|
||
|
|
"calib/step_q_gap": 0.09837700348432055,
|
||
|
|
"calib/step_q_w": 0.7312571428571427,
|
||
|
|
"calib/step_q_w_n": 175.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.24,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.833333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 38924825.0,
|
||
|
|
"reward": 0.625,
|
||
|
|
"reward_std": 0.2419390082359314,
|
||
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
||
|
|
"rewards/format_reward_step": 0.734375,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 1.0,
|
||
|
|
"calib/auroc": 0.7865737203972498,
|
||
|
|
"calib/avg_num_step_conf": 1.0859375,
|
||
|
|
"calib/ece": 0.34148437500000006,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.81640625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.10546875,
|
||
|
|
"calib/gap": 0.24296154825566607,
|
||
|
|
"calib/mean_conf": 0.7399218750000001,
|
||
|
|
"calib/mu_c": 0.8860784313725492,
|
||
|
|
"calib/mu_w": 0.6431168831168831,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.81640625,
|
||
|
|
"calib/pce": 0.34148437500000006,
|
||
|
|
"calib/std_conf": 0.27425893357643677,
|
||
|
|
"calib/step_conf_rate": 0.81640625,
|
||
|
|
"calib/step_q_c": 0.8683333333333334,
|
||
|
|
"calib/step_q_c_n": 132.0,
|
||
|
|
"calib/step_q_gap": 0.09675799086758008,
|
||
|
|
"calib/step_q_w": 0.7715753424657533,
|
||
|
|
"calib/step_q_w_n": 146.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2416,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.7777777777777776e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 39185953.0,
|
||
|
|
"reward": 0.806640625,
|
||
|
|
"reward_std": 0.14504341781139374,
|
||
|
|
"rewards/accuracy_reward_step": 0.3984375,
|
||
|
|
"rewards/format_reward_step": 0.81640625,
|
||
|
|
"step": 151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.8505436971405558,
|
||
|
|
"calib/avg_num_step_conf": 1.09765625,
|
||
|
|
"calib/ece": 0.43984375000000003,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.83203125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07421875,
|
||
|
|
"calib/gap": 0.2655110753121225,
|
||
|
|
"calib/mean_conf": 0.69375,
|
||
|
|
"calib/mu_c": 0.8918461538461538,
|
||
|
|
"calib/mu_w": 0.6263350785340314,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.84375,
|
||
|
|
"calib/pce": 0.43984375000000003,
|
||
|
|
"calib/std_conf": 0.2864614393771001,
|
||
|
|
"calib/step_conf_rate": 0.84375,
|
||
|
|
"calib/step_q_c": 0.8694,
|
||
|
|
"calib/step_q_c_n": 100.0,
|
||
|
|
"calib/step_q_gap": 0.1652563535911602,
|
||
|
|
"calib/step_q_w": 0.7041436464088398,
|
||
|
|
"calib/step_q_w_n": 181.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2432,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.7222222222222216e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 39444273.0,
|
||
|
|
"reward": 0.669921875,
|
||
|
|
"reward_std": 0.1663089096546173,
|
||
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
||
|
|
"rewards/format_reward_step": 0.83203125,
|
||
|
|
"step": 152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.94140625,
|
||
|
|
"calib/auroc": 0.7240257516608453,
|
||
|
|
"calib/avg_num_step_conf": 1.171875,
|
||
|
|
"calib/ece": 0.37652000000000013,
|
||
|
|
"calib/final_conf_rate": 0.9765625,
|
||
|
|
"calib/format_rate": 0.8046875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.064,
|
||
|
|
"calib/gap": 0.1877891925210604,
|
||
|
|
"calib/mean_conf": 0.74852,
|
||
|
|
"calib/mu_c": 0.8664516129032259,
|
||
|
|
"calib/mu_w": 0.6786624203821655,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.96875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.83203125,
|
||
|
|
"calib/pce": 0.37652000000000013,
|
||
|
|
"calib/std_conf": 0.2518106622047605,
|
||
|
|
"calib/step_conf_rate": 0.83203125,
|
||
|
|
"calib/step_q_c": 0.842280701754386,
|
||
|
|
"calib/step_q_c_n": 114.0,
|
||
|
|
"calib/step_q_gap": 0.08249575551782684,
|
||
|
|
"calib/step_q_w": 0.7597849462365591,
|
||
|
|
"calib/step_q_w_n": 186.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.984375,
|
||
|
|
"completions/max_length": 293.0,
|
||
|
|
"completions/max_terminated_length": 293.0,
|
||
|
|
"completions/mean_length": 2.734375,
|
||
|
|
"completions/mean_terminated_length": 175.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 117.0,
|
||
|
|
"epoch": 0.2448,
|
||
|
|
"grad_norm": 6.291600704193115,
|
||
|
|
"learning_rate": 2.6666666666666667e-07,
|
||
|
|
"loss": 0.0478,
|
||
|
|
"num_tokens": 39706157.0,
|
||
|
|
"reward": 0.765625,
|
||
|
|
"reward_std": 0.2518659234046936,
|
||
|
|
"rewards/accuracy_reward_step": 0.36328125,
|
||
|
|
"rewards/format_reward_step": 0.8046875,
|
||
|
|
"step": 153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.984375,
|
||
|
|
"calib/auroc": 0.7246344564526382,
|
||
|
|
"calib/avg_num_step_conf": 1.0078125,
|
||
|
|
"calib/ece": 0.30586956521739134,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07509881422924901,
|
||
|
|
"calib/gap": 0.19740209790209773,
|
||
|
|
"calib/mean_conf": 0.7406521739130434,
|
||
|
|
"calib/mu_c": 0.8522272727272726,
|
||
|
|
"calib/mu_w": 0.6548251748251749,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.84765625,
|
||
|
|
"calib/pce": 0.30586956521739134,
|
||
|
|
"calib/std_conf": 0.264670108566857,
|
||
|
|
"calib/step_conf_rate": 0.84765625,
|
||
|
|
"calib/step_q_c": 0.83856,
|
||
|
|
"calib/step_q_c_n": 125.0,
|
||
|
|
"calib/step_q_gap": 0.09600360902255645,
|
||
|
|
"calib/step_q_w": 0.7425563909774435,
|
||
|
|
"calib/step_q_w_n": 133.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 162.0,
|
||
|
|
"completions/max_terminated_length": 162.0,
|
||
|
|
"completions/mean_length": 1.515625,
|
||
|
|
"completions/mean_terminated_length": 129.33334350585938,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 109.0,
|
||
|
|
"epoch": 0.2464,
|
||
|
|
"grad_norm": 27.489587783813477,
|
||
|
|
"learning_rate": 2.6111111111111113e-07,
|
||
|
|
"loss": 0.1159,
|
||
|
|
"num_tokens": 39968409.0,
|
||
|
|
"reward": 0.84765625,
|
||
|
|
"reward_std": 0.24236908555030823,
|
||
|
|
"rewards/accuracy_reward_step": 0.43359375,
|
||
|
|
"rewards/format_reward_step": 0.828125,
|
||
|
|
"step": 154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7320774463631605,
|
||
|
|
"calib/avg_num_step_conf": 1.13671875,
|
||
|
|
"calib/ece": 0.34586614173228336,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.73828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09055118110236221,
|
||
|
|
"calib/gap": 0.21522370486656195,
|
||
|
|
"calib/mean_conf": 0.7316929133858268,
|
||
|
|
"calib/mu_c": 0.8638775510204081,
|
||
|
|
"calib/mu_w": 0.6486538461538461,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7578125,
|
||
|
|
"calib/pce": 0.34586614173228336,
|
||
|
|
"calib/std_conf": 0.25883120124355463,
|
||
|
|
"calib/step_conf_rate": 0.7578125,
|
||
|
|
"calib/step_q_c": 0.8274657534246576,
|
||
|
|
"calib/step_q_c_n": 146.0,
|
||
|
|
"calib/step_q_gap": 0.08346575342465756,
|
||
|
|
"calib/step_q_w": 0.744,
|
||
|
|
"calib/step_q_w_n": 145.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 179.0,
|
||
|
|
"completions/max_terminated_length": 179.0,
|
||
|
|
"completions/mean_length": 0.97265625,
|
||
|
|
"completions/mean_terminated_length": 124.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 70.0,
|
||
|
|
"epoch": 0.248,
|
||
|
|
"grad_norm": 12.564913749694824,
|
||
|
|
"learning_rate": 2.5555555555555553e-07,
|
||
|
|
"loss": 0.0699,
|
||
|
|
"num_tokens": 40230018.0,
|
||
|
|
"reward": 0.755859375,
|
||
|
|
"reward_std": 0.2155877947807312,
|
||
|
|
"rewards/accuracy_reward_step": 0.38671875,
|
||
|
|
"rewards/format_reward_step": 0.73828125,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.984375,
|
||
|
|
"calib/auroc": 0.7833593306863301,
|
||
|
|
"calib/avg_num_step_conf": 1.1640625,
|
||
|
|
"calib/ece": 0.37767716535433055,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.80078125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08661417322834646,
|
||
|
|
"calib/gap": 0.24506806579693707,
|
||
|
|
"calib/mean_conf": 0.7005118110236219,
|
||
|
|
"calib/mu_c": 0.8664634146341463,
|
||
|
|
"calib/mu_w": 0.6213953488372093,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.81640625,
|
||
|
|
"calib/pce": 0.37767716535433055,
|
||
|
|
"calib/std_conf": 0.2834811264848123,
|
||
|
|
"calib/step_conf_rate": 0.81640625,
|
||
|
|
"calib/step_q_c": 0.8305970149253732,
|
||
|
|
"calib/step_q_c_n": 134.0,
|
||
|
|
"calib/step_q_gap": 0.16748725882781224,
|
||
|
|
"calib/step_q_w": 0.663109756097561,
|
||
|
|
"calib/step_q_w_n": 164.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 176.0,
|
||
|
|
"completions/max_terminated_length": 176.0,
|
||
|
|
"completions/mean_length": 0.6875,
|
||
|
|
"completions/mean_terminated_length": 176.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 176.0,
|
||
|
|
"epoch": 0.2496,
|
||
|
|
"grad_norm": 6.4348907470703125,
|
||
|
|
"learning_rate": 2.5e-07,
|
||
|
|
"loss": 0.0181,
|
||
|
|
"num_tokens": 40492338.0,
|
||
|
|
"reward": 0.720703125,
|
||
|
|
"reward_std": 0.21248027682304382,
|
||
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
||
|
|
"rewards/format_reward_step": 0.80078125,
|
||
|
|
"step": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8132274776290116,
|
||
|
|
"calib/avg_num_step_conf": 1.14453125,
|
||
|
|
"calib/ece": 0.2923437499999999,
|
||
|
|
"calib/final_conf_rate": 1.0,
|
||
|
|
"calib/format_rate": 0.76953125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.04296875,
|
||
|
|
"calib/gap": 0.3179546524927672,
|
||
|
|
"calib/mean_conf": 0.6399999999999999,
|
||
|
|
"calib/mu_c": 0.8474157303370786,
|
||
|
|
"calib/mu_w": 0.5294610778443114,
|
||
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7734375,
|
||
|
|
"calib/pce": 0.2923437499999999,
|
||
|
|
"calib/std_conf": 0.3270858545550388,
|
||
|
|
"calib/step_conf_rate": 0.7734375,
|
||
|
|
"calib/step_q_c": 0.8076,
|
||
|
|
"calib/step_q_c_n": 125.0,
|
||
|
|
"calib/step_q_gap": 0.12188571428571437,
|
||
|
|
"calib/step_q_w": 0.6857142857142856,
|
||
|
|
"calib/step_q_w_n": 168.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2512,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.4444444444444445e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 40752426.0,
|
||
|
|
"reward": 0.732421875,
|
||
|
|
"reward_std": 0.2622066140174866,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 0.76953125,
|
||
|
|
"step": 157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7431954371525522,
|
||
|
|
"calib/avg_num_step_conf": 1.03125,
|
||
|
|
"calib/ece": 0.3642063492063493,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.68359375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0992063492063492,
|
||
|
|
"calib/gap": 0.2789733593242366,
|
||
|
|
"calib/mean_conf": 0.6856349206349205,
|
||
|
|
"calib/mu_c": 0.8749382716049383,
|
||
|
|
"calib/mu_w": 0.5959649122807017,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.69140625,
|
||
|
|
"calib/pce": 0.3642063492063493,
|
||
|
|
"calib/std_conf": 0.33009257006527015,
|
||
|
|
"calib/step_conf_rate": 0.69140625,
|
||
|
|
"calib/step_q_c": 0.8597872340425533,
|
||
|
|
"calib/step_q_c_n": 94.0,
|
||
|
|
"calib/step_q_gap": 0.07219899874843572,
|
||
|
|
"calib/step_q_w": 0.7875882352941176,
|
||
|
|
"calib/step_q_w_n": 170.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 194.0,
|
||
|
|
"completions/max_terminated_length": 194.0,
|
||
|
|
"completions/mean_length": 1.80078125,
|
||
|
|
"completions/mean_terminated_length": 153.6666717529297,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 113.0,
|
||
|
|
"epoch": 0.2528,
|
||
|
|
"grad_norm": 14.57109260559082,
|
||
|
|
"learning_rate": 2.388888888888889e-07,
|
||
|
|
"loss": 0.0776,
|
||
|
|
"num_tokens": 41014319.0,
|
||
|
|
"reward": 0.658203125,
|
||
|
|
"reward_std": 0.2848474979400635,
|
||
|
|
"rewards/accuracy_reward_step": 0.31640625,
|
||
|
|
"rewards/format_reward_step": 0.68359375,
|
||
|
|
"step": 158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.8447322970639033,
|
||
|
|
"calib/avg_num_step_conf": 0.828125,
|
||
|
|
"calib/ece": 0.4440711462450593,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.65625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07509881422924901,
|
||
|
|
"calib/gap": 0.28110708117443894,
|
||
|
|
"calib/mean_conf": 0.6812252964426877,
|
||
|
|
"calib/mu_c": 0.8956666666666668,
|
||
|
|
"calib/mu_w": 0.6145595854922279,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.6640625,
|
||
|
|
"calib/pce": 0.4440711462450593,
|
||
|
|
"calib/std_conf": 0.31747347564723677,
|
||
|
|
"calib/step_conf_rate": 0.6640625,
|
||
|
|
"calib/step_q_c": 0.8800000000000001,
|
||
|
|
"calib/step_q_c_n": 80.0,
|
||
|
|
"calib/step_q_gap": 0.1796212121212123,
|
||
|
|
"calib/step_q_w": 0.7003787878787878,
|
||
|
|
"calib/step_q_w_n": 132.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 192.0,
|
||
|
|
"completions/max_terminated_length": 192.0,
|
||
|
|
"completions/mean_length": 0.75,
|
||
|
|
"completions/mean_terminated_length": 192.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 192.0,
|
||
|
|
"epoch": 0.2544,
|
||
|
|
"grad_norm": 5.986937522888184,
|
||
|
|
"learning_rate": 2.3333333333333333e-07,
|
||
|
|
"loss": 0.0253,
|
||
|
|
"num_tokens": 41276223.0,
|
||
|
|
"reward": 0.5625,
|
||
|
|
"reward_std": 0.16526161134243011,
|
||
|
|
"rewards/accuracy_reward_step": 0.234375,
|
||
|
|
"rewards/format_reward_step": 0.65625,
|
||
|
|
"step": 159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.97265625,
|
||
|
|
"calib/auroc": 0.7982125124131082,
|
||
|
|
"calib/avg_num_step_conf": 1.24609375,
|
||
|
|
"calib/ece": 0.3841338582677163,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.78125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07086614173228346,
|
||
|
|
"calib/gap": 0.20020324395895428,
|
||
|
|
"calib/mean_conf": 0.7581496062992126,
|
||
|
|
"calib/mu_c": 0.8834736842105265,
|
||
|
|
"calib/mu_w": 0.6832704402515722,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.79296875,
|
||
|
|
"calib/pce": 0.3841338582677163,
|
||
|
|
"calib/std_conf": 0.2493979389422934,
|
||
|
|
"calib/step_conf_rate": 0.79296875,
|
||
|
|
"calib/step_q_c": 0.8559210526315789,
|
||
|
|
"calib/step_q_c_n": 152.0,
|
||
|
|
"calib/step_q_gap": 0.10981326820044124,
|
||
|
|
"calib/step_q_w": 0.7461077844311377,
|
||
|
|
"calib/step_q_w_n": 167.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.256,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.2777777777777776e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 41533975.0,
|
||
|
|
"reward": 0.76171875,
|
||
|
|
"reward_std": 0.22846969962120056,
|
||
|
|
"rewards/accuracy_reward_step": 0.37109375,
|
||
|
|
"rewards/format_reward_step": 0.78125,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.8618847627468317,
|
||
|
|
"calib/avg_num_step_conf": 0.8359375,
|
||
|
|
"calib/ece": 0.30623015873015874,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.6015625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.11904761904761904,
|
||
|
|
"calib/gap": 0.4072104332449164,
|
||
|
|
"calib/mean_conf": 0.6157539682539682,
|
||
|
|
"calib/mu_c": 0.8969230769230774,
|
||
|
|
"calib/mu_w": 0.489712643678161,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.609375,
|
||
|
|
"calib/pce": 0.30623015873015874,
|
||
|
|
"calib/std_conf": 0.366754084525311,
|
||
|
|
"calib/step_conf_rate": 0.609375,
|
||
|
|
"calib/step_q_c": 0.8607142857142858,
|
||
|
|
"calib/step_q_c_n": 112.0,
|
||
|
|
"calib/step_q_gap": 0.15679271708683484,
|
||
|
|
"calib/step_q_w": 0.7039215686274509,
|
||
|
|
"calib/step_q_w_n": 102.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 162.0,
|
||
|
|
"completions/max_terminated_length": 162.0,
|
||
|
|
"completions/mean_length": 0.98046875,
|
||
|
|
"completions/mean_terminated_length": 125.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 89.0,
|
||
|
|
"epoch": 0.2576,
|
||
|
|
"grad_norm": 6.436363697052002,
|
||
|
|
"learning_rate": 2.222222222222222e-07,
|
||
|
|
"loss": 0.0219,
|
||
|
|
"num_tokens": 41791610.0,
|
||
|
|
"reward": 0.609375,
|
||
|
|
"reward_std": 0.22688651084899902,
|
||
|
|
"rewards/accuracy_reward_step": 0.30859375,
|
||
|
|
"rewards/format_reward_step": 0.6015625,
|
||
|
|
"step": 161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.8629441624365483,
|
||
|
|
"calib/avg_num_step_conf": 0.9453125,
|
||
|
|
"calib/ece": 0.4206274509803921,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.60546875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.058823529411764705,
|
||
|
|
"calib/gap": 0.3042201995448979,
|
||
|
|
"calib/mean_conf": 0.6480784313725491,
|
||
|
|
"calib/mu_c": 0.8831034482758623,
|
||
|
|
"calib/mu_w": 0.5788832487309644,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.625,
|
||
|
|
"calib/pce": 0.4206274509803921,
|
||
|
|
"calib/std_conf": 0.30981182417322195,
|
||
|
|
"calib/step_conf_rate": 0.625,
|
||
|
|
"calib/step_q_c": 0.8247619047619048,
|
||
|
|
"calib/step_q_c_n": 105.0,
|
||
|
|
"calib/step_q_gap": 0.12987139381299984,
|
||
|
|
"calib/step_q_w": 0.694890510948905,
|
||
|
|
"calib/step_q_w_n": 137.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2592,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 2.1666666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 42046970.0,
|
||
|
|
"reward": 0.529296875,
|
||
|
|
"reward_std": 0.21713021397590637,
|
||
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
||
|
|
"rewards/format_reward_step": 0.60546875,
|
||
|
|
"step": 162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.95703125,
|
||
|
|
"calib/auroc": 0.8212469895638213,
|
||
|
|
"calib/avg_num_step_conf": 0.99609375,
|
||
|
|
"calib/ece": 0.29606425702811257,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.6953125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.14056224899598393,
|
||
|
|
"calib/gap": 0.31365935242172904,
|
||
|
|
"calib/mean_conf": 0.7016867469879517,
|
||
|
|
"calib/mu_c": 0.8881188118811885,
|
||
|
|
"calib/mu_w": 0.5744594594594594,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.96484375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.70703125,
|
||
|
|
"calib/pce": 0.29606425702811257,
|
||
|
|
"calib/std_conf": 0.3169298730678942,
|
||
|
|
"calib/step_conf_rate": 0.70703125,
|
||
|
|
"calib/step_q_c": 0.8518493150684932,
|
||
|
|
"calib/step_q_c_n": 146.0,
|
||
|
|
"calib/step_q_gap": 0.11065665451803453,
|
||
|
|
"calib/step_q_w": 0.7411926605504586,
|
||
|
|
"calib/step_q_w_n": 109.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 147.0,
|
||
|
|
"completions/max_terminated_length": 147.0,
|
||
|
|
"completions/mean_length": 0.8984375,
|
||
|
|
"completions/mean_terminated_length": 115.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 83.0,
|
||
|
|
"epoch": 0.2608,
|
||
|
|
"grad_norm": 22.149232864379883,
|
||
|
|
"learning_rate": 2.111111111111111e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 42305384.0,
|
||
|
|
"reward": 0.7421875,
|
||
|
|
"reward_std": 0.25784483551979065,
|
||
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
||
|
|
"rewards/format_reward_step": 0.6953125,
|
||
|
|
"step": 163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9921875,
|
||
|
|
"calib/auroc": 0.7302590414215994,
|
||
|
|
"calib/avg_num_step_conf": 1.3203125,
|
||
|
|
"calib/ece": 0.28664031620553365,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.13438735177865613,
|
||
|
|
"calib/gap": 0.21386497309473163,
|
||
|
|
"calib/mean_conf": 0.7688537549407116,
|
||
|
|
"calib/mu_c": 0.8795901639344264,
|
||
|
|
"calib/mu_w": 0.6657251908396947,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.83984375,
|
||
|
|
"calib/pce": 0.28664031620553365,
|
||
|
|
"calib/std_conf": 0.2422503816035925,
|
||
|
|
"calib/step_conf_rate": 0.83984375,
|
||
|
|
"calib/step_q_c": 0.8427272727272728,
|
||
|
|
"calib/step_q_c_n": 198.0,
|
||
|
|
"calib/step_q_gap": 0.09772727272727266,
|
||
|
|
"calib/step_q_w": 0.7450000000000001,
|
||
|
|
"calib/step_q_w_n": 140.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 157.0,
|
||
|
|
"completions/max_terminated_length": 157.0,
|
||
|
|
"completions/mean_length": 0.90625,
|
||
|
|
"completions/mean_terminated_length": 116.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 75.0,
|
||
|
|
"epoch": 0.2624,
|
||
|
|
"grad_norm": 24.581096649169922,
|
||
|
|
"learning_rate": 2.0555555555555553e-07,
|
||
|
|
"loss": 0.0773,
|
||
|
|
"num_tokens": 42566392.0,
|
||
|
|
"reward": 0.89453125,
|
||
|
|
"reward_std": 0.17005395889282227,
|
||
|
|
"rewards/accuracy_reward_step": 0.48046875,
|
||
|
|
"rewards/format_reward_step": 0.828125,
|
||
|
|
"step": 164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.953125,
|
||
|
|
"calib/auroc": 0.8419240669240668,
|
||
|
|
"calib/avg_num_step_conf": 1.16796875,
|
||
|
|
"calib/ece": 0.300711462450593,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.17391304347826086,
|
||
|
|
"calib/gap": 0.31613963963963976,
|
||
|
|
"calib/mean_conf": 0.7157312252964426,
|
||
|
|
"calib/mu_c": 0.9006666666666667,
|
||
|
|
"calib/mu_w": 0.584527027027027,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.76953125,
|
||
|
|
"calib/pce": 0.300711462450593,
|
||
|
|
"calib/std_conf": 0.3075684937587791,
|
||
|
|
"calib/step_conf_rate": 0.76953125,
|
||
|
|
"calib/step_q_c": 0.879591836734694,
|
||
|
|
"calib/step_q_c_n": 147.0,
|
||
|
|
"calib/step_q_gap": 0.15617078410311502,
|
||
|
|
"calib/step_q_w": 0.723421052631579,
|
||
|
|
"calib/step_q_w_n": 152.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 42.0,
|
||
|
|
"completions/max_terminated_length": 42.0,
|
||
|
|
"completions/mean_length": 0.1640625,
|
||
|
|
"completions/mean_terminated_length": 42.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 42.0,
|
||
|
|
"epoch": 0.264,
|
||
|
|
"grad_norm": 4.802876949310303,
|
||
|
|
"learning_rate": 2e-07,
|
||
|
|
"loss": 0.0113,
|
||
|
|
"num_tokens": 42827162.0,
|
||
|
|
"reward": 0.77734375,
|
||
|
|
"reward_std": 0.23624101281166077,
|
||
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
||
|
|
"rewards/format_reward_step": 0.734375,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.94921875,
|
||
|
|
"calib/auroc": 0.7948529411764707,
|
||
|
|
"calib/avg_num_step_conf": 0.9375,
|
||
|
|
"calib/ece": 0.3579032258064516,
|
||
|
|
"calib/final_conf_rate": 0.96875,
|
||
|
|
"calib/format_rate": 0.61328125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.024193548387096774,
|
||
|
|
"calib/gap": 0.2886732026143791,
|
||
|
|
"calib/mean_conf": 0.6320967741935484,
|
||
|
|
"calib/mu_c": 0.8416176470588236,
|
||
|
|
"calib/mu_w": 0.5529444444444445,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.97265625,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.6484375,
|
||
|
|
"calib/pce": 0.3579032258064516,
|
||
|
|
"calib/std_conf": 0.3087899750627789,
|
||
|
|
"calib/step_conf_rate": 0.6484375,
|
||
|
|
"calib/step_q_c": 0.8106306306306305,
|
||
|
|
"calib/step_q_c_n": 111.0,
|
||
|
|
"calib/step_q_gap": 0.129622878692646,
|
||
|
|
"calib/step_q_w": 0.6810077519379845,
|
||
|
|
"calib/step_q_w_n": 129.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.984375,
|
||
|
|
"completions/max_length": 229.0,
|
||
|
|
"completions/max_terminated_length": 229.0,
|
||
|
|
"completions/mean_length": 1.65625,
|
||
|
|
"completions/mean_terminated_length": 106.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 44.0,
|
||
|
|
"epoch": 0.2656,
|
||
|
|
"grad_norm": 9.170284271240234,
|
||
|
|
"learning_rate": 1.9444444444444445e-07,
|
||
|
|
"loss": 0.0368,
|
||
|
|
"num_tokens": 43085458.0,
|
||
|
|
"reward": 0.572265625,
|
||
|
|
"reward_std": 0.2555355727672577,
|
||
|
|
"rewards/accuracy_reward_step": 0.265625,
|
||
|
|
"rewards/format_reward_step": 0.61328125,
|
||
|
|
"step": 166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.97265625,
|
||
|
|
"calib/auroc": 0.782930402930403,
|
||
|
|
"calib/avg_num_step_conf": 1.0546875,
|
||
|
|
"calib/ece": 0.38193675889328055,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.63671875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09486166007905138,
|
||
|
|
"calib/gap": 0.2654461538461539,
|
||
|
|
"calib/mean_conf": 0.6902371541501976,
|
||
|
|
"calib/mu_c": 0.8738461538461538,
|
||
|
|
"calib/mu_w": 0.6083999999999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.97265625,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.64453125,
|
||
|
|
"calib/pce": 0.38193675889328055,
|
||
|
|
"calib/std_conf": 0.2974222970950437,
|
||
|
|
"calib/step_conf_rate": 0.64453125,
|
||
|
|
"calib/step_q_c": 0.8478313253012049,
|
||
|
|
"calib/step_q_c_n": 83.0,
|
||
|
|
"calib/step_q_gap": 0.11248373171831716,
|
||
|
|
"calib/step_q_w": 0.7353475935828877,
|
||
|
|
"calib/step_q_w_n": 187.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 190.0,
|
||
|
|
"completions/max_terminated_length": 190.0,
|
||
|
|
"completions/mean_length": 0.7421875,
|
||
|
|
"completions/mean_terminated_length": 190.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 190.0,
|
||
|
|
"epoch": 0.2672,
|
||
|
|
"grad_norm": 11.115410804748535,
|
||
|
|
"learning_rate": 1.8888888888888888e-07,
|
||
|
|
"loss": 0.0253,
|
||
|
|
"num_tokens": 43347136.0,
|
||
|
|
"reward": 0.623046875,
|
||
|
|
"reward_std": 0.19046950340270996,
|
||
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
||
|
|
"rewards/format_reward_step": 0.63671875,
|
||
|
|
"step": 167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.984375,
|
||
|
|
"calib/auroc": 0.825672454885938,
|
||
|
|
"calib/avg_num_step_conf": 1.15625,
|
||
|
|
"calib/ece": 0.37531496062992137,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.7734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09055118110236221,
|
||
|
|
"calib/gap": 0.2425311542390196,
|
||
|
|
"calib/mean_conf": 0.7257086614173228,
|
||
|
|
"calib/mu_c": 0.8832584269662923,
|
||
|
|
"calib/mu_w": 0.6407272727272727,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.78515625,
|
||
|
|
"calib/pce": 0.37531496062992137,
|
||
|
|
"calib/std_conf": 0.26522032469937384,
|
||
|
|
"calib/step_conf_rate": 0.78515625,
|
||
|
|
"calib/step_q_c": 0.8656164383561644,
|
||
|
|
"calib/step_q_c_n": 146.0,
|
||
|
|
"calib/step_q_gap": 0.14681643835616442,
|
||
|
|
"calib/step_q_w": 0.7188,
|
||
|
|
"calib/step_q_w_n": 150.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2688,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.833333333333333e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 43608984.0,
|
||
|
|
"reward": 0.734375,
|
||
|
|
"reward_std": 0.1352011114358902,
|
||
|
|
"rewards/accuracy_reward_step": 0.34765625,
|
||
|
|
"rewards/format_reward_step": 0.7734375,
|
||
|
|
"step": 168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.984375,
|
||
|
|
"calib/auroc": 0.8567620650953984,
|
||
|
|
"calib/avg_num_step_conf": 0.83203125,
|
||
|
|
"calib/ece": 0.40277777777777773,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.578125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05555555555555555,
|
||
|
|
"calib/gap": 0.3256902356902356,
|
||
|
|
"calib/mean_conf": 0.6170634920634921,
|
||
|
|
"calib/mu_c": 0.8729629629629628,
|
||
|
|
"calib/mu_w": 0.5472727272727272,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.59765625,
|
||
|
|
"calib/pce": 0.40277777777777773,
|
||
|
|
"calib/std_conf": 0.32718755230212576,
|
||
|
|
"calib/step_conf_rate": 0.59765625,
|
||
|
|
"calib/step_q_c": 0.86,
|
||
|
|
"calib/step_q_c_n": 71.0,
|
||
|
|
"calib/step_q_gap": 0.17563380281690133,
|
||
|
|
"calib/step_q_w": 0.6843661971830987,
|
||
|
|
"calib/step_q_w_n": 142.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 160.0,
|
||
|
|
"completions/max_terminated_length": 160.0,
|
||
|
|
"completions/mean_length": 1.1796875,
|
||
|
|
"completions/mean_terminated_length": 151.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 142.0,
|
||
|
|
"epoch": 0.2704,
|
||
|
|
"grad_norm": 4.74724817276001,
|
||
|
|
"learning_rate": 1.7777777777777776e-07,
|
||
|
|
"loss": 0.023,
|
||
|
|
"num_tokens": 43870190.0,
|
||
|
|
"reward": 0.5,
|
||
|
|
"reward_std": 0.19524022936820984,
|
||
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
||
|
|
"rewards/format_reward_step": 0.578125,
|
||
|
|
"step": 169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.8526422764227642,
|
||
|
|
"calib/avg_num_step_conf": 1.0703125,
|
||
|
|
"calib/ece": 0.2961417322834644,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.76171875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.06299212598425197,
|
||
|
|
"calib/gap": 0.3425813008130082,
|
||
|
|
"calib/mean_conf": 0.6504724409448819,
|
||
|
|
"calib/mu_c": 0.8716666666666667,
|
||
|
|
"calib/mu_w": 0.5290853658536585,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.78515625,
|
||
|
|
"calib/pce": 0.2961417322834644,
|
||
|
|
"calib/std_conf": 0.3323842721288249,
|
||
|
|
"calib/step_conf_rate": 0.78515625,
|
||
|
|
"calib/step_q_c": 0.8497478991596636,
|
||
|
|
"calib/step_q_c_n": 119.0,
|
||
|
|
"calib/step_q_gap": 0.22755435077256692,
|
||
|
|
"calib/step_q_w": 0.6221935483870967,
|
||
|
|
"calib/step_q_w_n": 155.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 287.0,
|
||
|
|
"completions/max_terminated_length": 287.0,
|
||
|
|
"completions/mean_length": 1.98828125,
|
||
|
|
"completions/mean_terminated_length": 254.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 222.0,
|
||
|
|
"epoch": 0.272,
|
||
|
|
"grad_norm": 10.044112205505371,
|
||
|
|
"learning_rate": 1.7222222222222222e-07,
|
||
|
|
"loss": 0.0533,
|
||
|
|
"num_tokens": 44122075.0,
|
||
|
|
"reward": 0.732421875,
|
||
|
|
"reward_std": 0.17411759495735168,
|
||
|
|
"rewards/accuracy_reward_step": 0.3515625,
|
||
|
|
"rewards/format_reward_step": 0.76171875,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.95703125,
|
||
|
|
"calib/auroc": 0.8251155115511551,
|
||
|
|
"calib/avg_num_step_conf": 1.15234375,
|
||
|
|
"calib/ece": 0.2896812749003984,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.796875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0796812749003984,
|
||
|
|
"calib/gap": 0.288952475247525,
|
||
|
|
"calib/mean_conf": 0.6920717131474102,
|
||
|
|
"calib/mu_c": 0.864752475247525,
|
||
|
|
"calib/mu_w": 0.5758,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.81640625,
|
||
|
|
"calib/pce": 0.2896812749003984,
|
||
|
|
"calib/std_conf": 0.3005421575798019,
|
||
|
|
"calib/step_conf_rate": 0.81640625,
|
||
|
|
"calib/step_q_c": 0.8299319727891157,
|
||
|
|
"calib/step_q_c_n": 147.0,
|
||
|
|
"calib/step_q_gap": 0.1838508917080347,
|
||
|
|
"calib/step_q_w": 0.646081081081081,
|
||
|
|
"calib/step_q_w_n": 148.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 201.0,
|
||
|
|
"completions/max_terminated_length": 201.0,
|
||
|
|
"completions/mean_length": 1.50390625,
|
||
|
|
"completions/mean_terminated_length": 192.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 184.0,
|
||
|
|
"epoch": 0.2736,
|
||
|
|
"grad_norm": 13.769118309020996,
|
||
|
|
"learning_rate": 1.6666666666666665e-07,
|
||
|
|
"loss": 0.0434,
|
||
|
|
"num_tokens": 44380220.0,
|
||
|
|
"reward": 0.79296875,
|
||
|
|
"reward_std": 0.22870349884033203,
|
||
|
|
"rewards/accuracy_reward_step": 0.39453125,
|
||
|
|
"rewards/format_reward_step": 0.796875,
|
||
|
|
"step": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96484375,
|
||
|
|
"calib/auroc": 0.851388079984469,
|
||
|
|
"calib/avg_num_step_conf": 0.89453125,
|
||
|
|
"calib/ece": 0.4028458498023715,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.67578125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05533596837944664,
|
||
|
|
"calib/gap": 0.34195593088720644,
|
||
|
|
"calib/mean_conf": 0.6044268774703557,
|
||
|
|
"calib/mu_c": 0.877450980392157,
|
||
|
|
"calib/mu_w": 0.5354950495049505,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.72265625,
|
||
|
|
"calib/pce": 0.4028458498023715,
|
||
|
|
"calib/std_conf": 0.3585347977182058,
|
||
|
|
"calib/step_conf_rate": 0.72265625,
|
||
|
|
"calib/step_q_c": 0.8674603174603177,
|
||
|
|
"calib/step_q_c_n": 63.0,
|
||
|
|
"calib/step_q_gap": 0.2221591126410407,
|
||
|
|
"calib/step_q_w": 0.645301204819277,
|
||
|
|
"calib/step_q_w_n": 166.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 200.0,
|
||
|
|
"completions/max_terminated_length": 200.0,
|
||
|
|
"completions/mean_length": 0.78125,
|
||
|
|
"completions/mean_terminated_length": 200.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 200.0,
|
||
|
|
"epoch": 0.2752,
|
||
|
|
"grad_norm": 3.7966060638427734,
|
||
|
|
"learning_rate": 1.611111111111111e-07,
|
||
|
|
"loss": 0.0113,
|
||
|
|
"num_tokens": 44639148.0,
|
||
|
|
"reward": 0.537109375,
|
||
|
|
"reward_std": 0.15649619698524475,
|
||
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
||
|
|
"rewards/format_reward_step": 0.67578125,
|
||
|
|
"step": 172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96484375,
|
||
|
|
"calib/auroc": 0.7164351851851851,
|
||
|
|
"calib/avg_num_step_conf": 0.97265625,
|
||
|
|
"calib/ece": 0.43691699604743073,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.76171875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.023715415019762844,
|
||
|
|
"calib/gap": 0.21969163359788357,
|
||
|
|
"calib/mean_conf": 0.6819762845849802,
|
||
|
|
"calib/mu_c": 0.84609375,
|
||
|
|
"calib/mu_w": 0.6264021164021164,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.80078125,
|
||
|
|
"calib/pce": 0.4329644268774703,
|
||
|
|
"calib/std_conf": 0.3034076026579139,
|
||
|
|
"calib/step_conf_rate": 0.80078125,
|
||
|
|
"calib/step_q_c": 0.835625,
|
||
|
|
"calib/step_q_c_n": 80.0,
|
||
|
|
"calib/step_q_gap": 0.13396819526627224,
|
||
|
|
"calib/step_q_w": 0.7016568047337277,
|
||
|
|
"calib/step_q_w_n": 169.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2768,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.5555555555555556e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 44899180.0,
|
||
|
|
"reward": 0.630859375,
|
||
|
|
"reward_std": 0.15970739722251892,
|
||
|
|
"rewards/accuracy_reward_step": 0.25,
|
||
|
|
"rewards/format_reward_step": 0.76171875,
|
||
|
|
"step": 173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.95703125,
|
||
|
|
"calib/auroc": 0.7298828719196836,
|
||
|
|
"calib/avg_num_step_conf": 1.06640625,
|
||
|
|
"calib/ece": 0.42377510040160626,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.74609375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.07228915662650602,
|
||
|
|
"calib/gap": 0.19216154548220254,
|
||
|
|
"calib/mean_conf": 0.7209638554216867,
|
||
|
|
"calib/mu_c": 0.8544736842105263,
|
||
|
|
"calib/mu_w": 0.6623121387283237,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7578125,
|
||
|
|
"calib/pce": 0.4197590361445782,
|
||
|
|
"calib/std_conf": 0.26651528339652664,
|
||
|
|
"calib/step_conf_rate": 0.7578125,
|
||
|
|
"calib/step_q_c": 0.8565384615384617,
|
||
|
|
"calib/step_q_c_n": 104.0,
|
||
|
|
"calib/step_q_gap": 0.12588757396449723,
|
||
|
|
"calib/step_q_w": 0.7306508875739645,
|
||
|
|
"calib/step_q_w_n": 169.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 245.0,
|
||
|
|
"completions/max_terminated_length": 245.0,
|
||
|
|
"completions/mean_length": 2.234375,
|
||
|
|
"completions/mean_terminated_length": 190.6666717529297,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 144.0,
|
||
|
|
"epoch": 0.2784,
|
||
|
|
"grad_norm": 12.086045265197754,
|
||
|
|
"learning_rate": 1.5e-07,
|
||
|
|
"loss": 0.0447,
|
||
|
|
"num_tokens": 45161896.0,
|
||
|
|
"reward": 0.669921875,
|
||
|
|
"reward_std": 0.21960091590881348,
|
||
|
|
"rewards/accuracy_reward_step": 0.296875,
|
||
|
|
"rewards/format_reward_step": 0.74609375,
|
||
|
|
"step": 174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9609375,
|
||
|
|
"calib/auroc": 0.7342500689274882,
|
||
|
|
"calib/avg_num_step_conf": 1.0234375,
|
||
|
|
"calib/ece": 0.31851405622489953,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.75,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08835341365461848,
|
||
|
|
"calib/gap": 0.26453680727874296,
|
||
|
|
"calib/mean_conf": 0.692008032128514,
|
||
|
|
"calib/mu_c": 0.8577419354838711,
|
||
|
|
"calib/mu_w": 0.5932051282051282,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.78515625,
|
||
|
|
"calib/pce": 0.31851405622489953,
|
||
|
|
"calib/std_conf": 0.3114727033231076,
|
||
|
|
"calib/step_conf_rate": 0.78515625,
|
||
|
|
"calib/step_q_c": 0.8468253968253967,
|
||
|
|
"calib/step_q_c_n": 126.0,
|
||
|
|
"calib/step_q_gap": 0.11101657329598502,
|
||
|
|
"calib/step_q_w": 0.7358088235294117,
|
||
|
|
"calib/step_q_w_n": 136.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 191.0,
|
||
|
|
"completions/max_terminated_length": 191.0,
|
||
|
|
"completions/mean_length": 2.09375,
|
||
|
|
"completions/mean_terminated_length": 178.6666717529297,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 172.0,
|
||
|
|
"epoch": 0.28,
|
||
|
|
"grad_norm": 5.533905506134033,
|
||
|
|
"learning_rate": 1.4444444444444442e-07,
|
||
|
|
"loss": 0.0483,
|
||
|
|
"num_tokens": 45420776.0,
|
||
|
|
"reward": 0.73828125,
|
||
|
|
"reward_std": 0.1640671193599701,
|
||
|
|
"rewards/accuracy_reward_step": 0.36328125,
|
||
|
|
"rewards/format_reward_step": 0.75,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96875,
|
||
|
|
"calib/auroc": 0.7624078275666477,
|
||
|
|
"calib/avg_num_step_conf": 1.05859375,
|
||
|
|
"calib/ece": 0.40164,
|
||
|
|
"calib/final_conf_rate": 0.9765625,
|
||
|
|
"calib/format_rate": 0.83984375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.092,
|
||
|
|
"calib/gap": 0.19808139534883729,
|
||
|
|
"calib/mean_conf": 0.74564,
|
||
|
|
"calib/mu_c": 0.8755813953488372,
|
||
|
|
"calib/mu_w": 0.6774999999999999,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.87109375,
|
||
|
|
"calib/pce": 0.40164,
|
||
|
|
"calib/std_conf": 0.2577250286642723,
|
||
|
|
"calib/step_conf_rate": 0.87109375,
|
||
|
|
"calib/step_q_c": 0.8550925925925926,
|
||
|
|
"calib/step_q_c_n": 108.0,
|
||
|
|
"calib/step_q_gap": 0.11809872756191786,
|
||
|
|
"calib/step_q_w": 0.7369938650306748,
|
||
|
|
"calib/step_q_w_n": 163.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 156.0,
|
||
|
|
"completions/max_terminated_length": 156.0,
|
||
|
|
"completions/mean_length": 1.48046875,
|
||
|
|
"completions/mean_terminated_length": 126.33333587646484,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 93.0,
|
||
|
|
"epoch": 0.2816,
|
||
|
|
"grad_norm": 19.42263412475586,
|
||
|
|
"learning_rate": 1.3888888888888888e-07,
|
||
|
|
"loss": 0.0619,
|
||
|
|
"num_tokens": 45677851.0,
|
||
|
|
"reward": 0.759765625,
|
||
|
|
"reward_std": 0.22453749179840088,
|
||
|
|
"rewards/accuracy_reward_step": 0.33984375,
|
||
|
|
"rewards/format_reward_step": 0.83984375,
|
||
|
|
"step": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.94921875,
|
||
|
|
"calib/auroc": 0.6719135802469136,
|
||
|
|
"calib/avg_num_step_conf": 1.08984375,
|
||
|
|
"calib/ece": 0.4373092369477912,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05220883534136546,
|
||
|
|
"calib/gap": 0.2141666666666664,
|
||
|
|
"calib/mean_conf": 0.6782730923694779,
|
||
|
|
"calib/mu_c": 0.8408333333333331,
|
||
|
|
"calib/mu_w": 0.6266666666666667,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.77734375,
|
||
|
|
"calib/pce": 0.4373092369477912,
|
||
|
|
"calib/std_conf": 0.32396363912757653,
|
||
|
|
"calib/step_conf_rate": 0.77734375,
|
||
|
|
"calib/step_q_c": 0.8234177215189872,
|
||
|
|
"calib/step_q_c_n": 79.0,
|
||
|
|
"calib/step_q_gap": 0.09066772151898728,
|
||
|
|
"calib/step_q_w": 0.7327499999999999,
|
||
|
|
"calib/step_q_w_n": 200.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 215.0,
|
||
|
|
"completions/max_terminated_length": 215.0,
|
||
|
|
"completions/mean_length": 1.41015625,
|
||
|
|
"completions/mean_terminated_length": 180.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 146.0,
|
||
|
|
"epoch": 0.2832,
|
||
|
|
"grad_norm": 10.389142036437988,
|
||
|
|
"learning_rate": 1.3333333333333334e-07,
|
||
|
|
"loss": 0.0391,
|
||
|
|
"num_tokens": 45940356.0,
|
||
|
|
"reward": 0.6015625,
|
||
|
|
"reward_std": 0.14131318032741547,
|
||
|
|
"rewards/accuracy_reward_step": 0.234375,
|
||
|
|
"rewards/format_reward_step": 0.734375,
|
||
|
|
"step": 177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96875,
|
||
|
|
"calib/auroc": 0.6888651121605667,
|
||
|
|
"calib/avg_num_step_conf": 1.0546875,
|
||
|
|
"calib/ece": 0.3931620553359682,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.76171875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.09881422924901186,
|
||
|
|
"calib/gap": 0.24795454545454532,
|
||
|
|
"calib/mean_conf": 0.6975098814229248,
|
||
|
|
"calib/mu_c": 0.8699999999999998,
|
||
|
|
"calib/mu_w": 0.6220454545454545,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.77734375,
|
||
|
|
"calib/pce": 0.3931620553359682,
|
||
|
|
"calib/std_conf": 0.3263326658421405,
|
||
|
|
"calib/step_conf_rate": 0.77734375,
|
||
|
|
"calib/step_q_c": 0.8630769230769229,
|
||
|
|
"calib/step_q_c_n": 91.0,
|
||
|
|
"calib/step_q_gap": 0.11285345938977209,
|
||
|
|
"calib/step_q_w": 0.7502234636871508,
|
||
|
|
"calib/step_q_w_n": 179.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 264.0,
|
||
|
|
"completions/max_terminated_length": 264.0,
|
||
|
|
"completions/mean_length": 1.8515625,
|
||
|
|
"completions/mean_terminated_length": 237.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 210.0,
|
||
|
|
"epoch": 0.2848,
|
||
|
|
"grad_norm": 7.318977355957031,
|
||
|
|
"learning_rate": 1.2777777777777777e-07,
|
||
|
|
"loss": 0.0422,
|
||
|
|
"num_tokens": 46202382.0,
|
||
|
|
"reward": 0.681640625,
|
||
|
|
"reward_std": 0.1777898669242859,
|
||
|
|
"rewards/accuracy_reward_step": 0.30078125,
|
||
|
|
"rewards/format_reward_step": 0.76171875,
|
||
|
|
"step": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.97265625,
|
||
|
|
"calib/auroc": 0.802274185499394,
|
||
|
|
"calib/avg_num_step_conf": 1.03125,
|
||
|
|
"calib/ece": 0.4046428571428572,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.7890625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.12698412698412698,
|
||
|
|
"calib/gap": 0.23152562914379415,
|
||
|
|
"calib/mean_conf": 0.7340079365079365,
|
||
|
|
"calib/mu_c": 0.889277108433735,
|
||
|
|
"calib/mu_w": 0.6577514792899408,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.81640625,
|
||
|
|
"calib/pce": 0.4046428571428572,
|
||
|
|
"calib/std_conf": 0.2902711648499745,
|
||
|
|
"calib/step_conf_rate": 0.81640625,
|
||
|
|
"calib/step_q_c": 0.8671296296296295,
|
||
|
|
"calib/step_q_c_n": 108.0,
|
||
|
|
"calib/step_q_gap": 0.15597578347578334,
|
||
|
|
"calib/step_q_w": 0.7111538461538461,
|
||
|
|
"calib/step_q_w_n": 156.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 162.0,
|
||
|
|
"completions/max_terminated_length": 162.0,
|
||
|
|
"completions/mean_length": 0.6328125,
|
||
|
|
"completions/mean_terminated_length": 162.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 162.0,
|
||
|
|
"epoch": 0.2864,
|
||
|
|
"grad_norm": 3.512399673461914,
|
||
|
|
"learning_rate": 1.2222222222222222e-07,
|
||
|
|
"loss": 0.0113,
|
||
|
|
"num_tokens": 46457040.0,
|
||
|
|
"reward": 0.71875,
|
||
|
|
"reward_std": 0.2521737813949585,
|
||
|
|
"rewards/accuracy_reward_step": 0.32421875,
|
||
|
|
"rewards/format_reward_step": 0.7890625,
|
||
|
|
"step": 179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7690644932671864,
|
||
|
|
"calib/avg_num_step_conf": 1.03515625,
|
||
|
|
"calib/ece": 0.3682470119521912,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08366533864541832,
|
||
|
|
"calib/gap": 0.25178525868178603,
|
||
|
|
"calib/mean_conf": 0.7068924302788845,
|
||
|
|
"calib/mu_c": 0.8734117647058824,
|
||
|
|
"calib/mu_w": 0.6216265060240964,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.77734375,
|
||
|
|
"calib/pce": 0.3682470119521912,
|
||
|
|
"calib/std_conf": 0.28738951520310446,
|
||
|
|
"calib/step_conf_rate": 0.77734375,
|
||
|
|
"calib/step_q_c": 0.8467164179104478,
|
||
|
|
"calib/step_q_c_n": 134.0,
|
||
|
|
"calib/step_q_gap": 0.1822126011165548,
|
||
|
|
"calib/step_q_w": 0.664503816793893,
|
||
|
|
"calib/step_q_w_n": 131.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.288,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.1666666666666667e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 46714200.0,
|
||
|
|
"reward": 0.71484375,
|
||
|
|
"reward_std": 0.1728074997663498,
|
||
|
|
"rewards/accuracy_reward_step": 0.33203125,
|
||
|
|
"rewards/format_reward_step": 0.765625,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96484375,
|
||
|
|
"calib/auroc": 0.773431031627753,
|
||
|
|
"calib/avg_num_step_conf": 0.91796875,
|
||
|
|
"calib/ece": 0.4519277108433736,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.75,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0963855421686747,
|
||
|
|
"calib/gap": 0.23396671634376554,
|
||
|
|
"calib/mean_conf": 0.716987951807229,
|
||
|
|
"calib/mu_c": 0.8889393939393939,
|
||
|
|
"calib/mu_w": 0.6549726775956284,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.79296875,
|
||
|
|
"calib/pce": 0.4519277108433736,
|
||
|
|
"calib/std_conf": 0.3047089661820087,
|
||
|
|
"calib/step_conf_rate": 0.79296875,
|
||
|
|
"calib/step_q_c": 0.8807246376811594,
|
||
|
|
"calib/step_q_c_n": 69.0,
|
||
|
|
"calib/step_q_gap": 0.1521101798498341,
|
||
|
|
"calib/step_q_w": 0.7286144578313253,
|
||
|
|
"calib/step_q_w_n": 166.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2896,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.111111111111111e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 46969704.0,
|
||
|
|
"reward": 0.6328125,
|
||
|
|
"reward_std": 0.25595584511756897,
|
||
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
||
|
|
"rewards/format_reward_step": 0.75,
|
||
|
|
"step": 181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.7419469277706825,
|
||
|
|
"calib/avg_num_step_conf": 1.05859375,
|
||
|
|
"calib/ece": 0.3729803921568627,
|
||
|
|
"calib/final_conf_rate": 0.99609375,
|
||
|
|
"calib/format_rate": 0.76953125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.050980392156862744,
|
||
|
|
"calib/gap": 0.2541890166028098,
|
||
|
|
"calib/mean_conf": 0.6906274509803921,
|
||
|
|
"calib/mu_c": 0.8640740740740742,
|
||
|
|
"calib/mu_w": 0.6098850574712644,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7890625,
|
||
|
|
"calib/pce": 0.3729803921568627,
|
||
|
|
"calib/std_conf": 0.31367586372199213,
|
||
|
|
"calib/step_conf_rate": 0.7890625,
|
||
|
|
"calib/step_q_c": 0.8459223300970876,
|
||
|
|
"calib/step_q_c_n": 103.0,
|
||
|
|
"calib/step_q_gap": 0.07181518723994484,
|
||
|
|
"calib/step_q_w": 0.7741071428571428,
|
||
|
|
"calib/step_q_w_n": 168.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2912,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 1.0555555555555555e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 47230352.0,
|
||
|
|
"reward": 0.701171875,
|
||
|
|
"reward_std": 0.19887131452560425,
|
||
|
|
"rewards/accuracy_reward_step": 0.31640625,
|
||
|
|
"rewards/format_reward_step": 0.76953125,
|
||
|
|
"step": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.94921875,
|
||
|
|
"calib/auroc": 0.7689903846153846,
|
||
|
|
"calib/avg_num_step_conf": 0.96875,
|
||
|
|
"calib/ece": 0.35266932270916324,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.76953125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.06772908366533864,
|
||
|
|
"calib/gap": 0.23867582417582445,
|
||
|
|
"calib/mean_conf": 0.71203187250996,
|
||
|
|
"calib/mu_c": 0.8641758241758244,
|
||
|
|
"calib/mu_w": 0.6255,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.82421875,
|
||
|
|
"calib/pce": 0.3510756972111553,
|
||
|
|
"calib/std_conf": 0.2954668380217944,
|
||
|
|
"calib/step_conf_rate": 0.82421875,
|
||
|
|
"calib/step_q_c": 0.8232692307692306,
|
||
|
|
"calib/step_q_c_n": 104.0,
|
||
|
|
"calib/step_q_gap": 0.14653311965811966,
|
||
|
|
"calib/step_q_w": 0.676736111111111,
|
||
|
|
"calib/step_q_w_n": 144.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 171.0,
|
||
|
|
"completions/max_terminated_length": 171.0,
|
||
|
|
"completions/mean_length": 1.78125,
|
||
|
|
"completions/mean_terminated_length": 152.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 119.0,
|
||
|
|
"epoch": 0.2928,
|
||
|
|
"grad_norm": 20.446836471557617,
|
||
|
|
"learning_rate": 1e-07,
|
||
|
|
"loss": 0.0808,
|
||
|
|
"num_tokens": 47491240.0,
|
||
|
|
"reward": 0.740234375,
|
||
|
|
"reward_std": 0.2335708737373352,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.76953125,
|
||
|
|
"step": 183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7458476622521402,
|
||
|
|
"calib/avg_num_step_conf": 0.91015625,
|
||
|
|
"calib/ece": 0.35170634920634913,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.73828125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05952380952380952,
|
||
|
|
"calib/gap": 0.26734689397819544,
|
||
|
|
"calib/mean_conf": 0.6651984126984128,
|
||
|
|
"calib/mu_c": 0.8487341772151896,
|
||
|
|
"calib/mu_w": 0.5813872832369942,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.75390625,
|
||
|
|
"calib/pce": 0.35170634920634913,
|
||
|
|
"calib/std_conf": 0.32788578440751,
|
||
|
|
"calib/step_conf_rate": 0.75390625,
|
||
|
|
"calib/step_q_c": 0.8494505494505494,
|
||
|
|
"calib/step_q_c_n": 91.0,
|
||
|
|
"calib/step_q_gap": 0.1465632255068875,
|
||
|
|
"calib/step_q_w": 0.7028873239436619,
|
||
|
|
"calib/step_q_w_n": 142.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2944,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 9.444444444444444e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 47750568.0,
|
||
|
|
"reward": 0.681640625,
|
||
|
|
"reward_std": 0.18778353929519653,
|
||
|
|
"rewards/accuracy_reward_step": 0.3125,
|
||
|
|
"rewards/format_reward_step": 0.73828125,
|
||
|
|
"step": 184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7827762515262515,
|
||
|
|
"calib/avg_num_step_conf": 0.84765625,
|
||
|
|
"calib/ece": 0.31149606299212595,
|
||
|
|
"calib/final_conf_rate": 0.9921875,
|
||
|
|
"calib/format_rate": 0.65625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.047244094488188976,
|
||
|
|
"calib/gap": 0.35155067155067155,
|
||
|
|
"calib/mean_conf": 0.5792125984251969,
|
||
|
|
"calib/mu_c": 0.8311111111111111,
|
||
|
|
"calib/mu_w": 0.4795604395604396,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.671875,
|
||
|
|
"calib/pce": 0.3036220472440945,
|
||
|
|
"calib/std_conf": 0.3776181179325033,
|
||
|
|
"calib/step_conf_rate": 0.671875,
|
||
|
|
"calib/step_q_c": 0.8268085106382982,
|
||
|
|
"calib/step_q_c_n": 94.0,
|
||
|
|
"calib/step_q_gap": 0.17778412039439584,
|
||
|
|
"calib/step_q_w": 0.6490243902439023,
|
||
|
|
"calib/step_q_w_n": 123.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.296,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.888888888888888e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 48011368.0,
|
||
|
|
"reward": 0.609375,
|
||
|
|
"reward_std": 0.22449977695941925,
|
||
|
|
"rewards/accuracy_reward_step": 0.28125,
|
||
|
|
"rewards/format_reward_step": 0.65625,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96875,
|
||
|
|
"calib/auroc": 0.7705296579593014,
|
||
|
|
"calib/avg_num_step_conf": 1.109375,
|
||
|
|
"calib/ece": 0.3654780876494024,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.8125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0398406374501992,
|
||
|
|
"calib/gap": 0.24909149949487663,
|
||
|
|
"calib/mean_conf": 0.6905776892430279,
|
||
|
|
"calib/mu_c": 0.8582926829268293,
|
||
|
|
"calib/mu_w": 0.6092011834319526,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8671875,
|
||
|
|
"calib/pce": 0.36468127490039837,
|
||
|
|
"calib/std_conf": 0.30374810828561666,
|
||
|
|
"calib/step_conf_rate": 0.8671875,
|
||
|
|
"calib/step_q_c": 0.8133684210526315,
|
||
|
|
"calib/step_q_c_n": 95.0,
|
||
|
|
"calib/step_q_gap": 0.09839487607908659,
|
||
|
|
"calib/step_q_w": 0.7149735449735449,
|
||
|
|
"calib/step_q_w_n": 189.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.2976,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 8.333333333333333e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 48273152.0,
|
||
|
|
"reward": 0.7265625,
|
||
|
|
"reward_std": 0.25780707597732544,
|
||
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
||
|
|
"rewards/format_reward_step": 0.8125,
|
||
|
|
"step": 186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9609375,
|
||
|
|
"calib/auroc": 0.6652142338416848,
|
||
|
|
"calib/avg_num_step_conf": 1.07421875,
|
||
|
|
"calib/ece": 0.40402390438247,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.86328125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0398406374501992,
|
||
|
|
"calib/gap": 0.17209005083514883,
|
||
|
|
"calib/mean_conf": 0.7203585657370517,
|
||
|
|
"calib/mu_c": 0.8369135802469135,
|
||
|
|
"calib/mu_w": 0.6648235294117647,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.921875,
|
||
|
|
"calib/pce": 0.4008366533864541,
|
||
|
|
"calib/std_conf": 0.26822916206632613,
|
||
|
|
"calib/step_conf_rate": 0.921875,
|
||
|
|
"calib/step_q_c": 0.8310752688172042,
|
||
|
|
"calib/step_q_c_n": 93.0,
|
||
|
|
"calib/step_q_gap": 0.11783351057544589,
|
||
|
|
"calib/step_q_w": 0.7132417582417583,
|
||
|
|
"calib/step_q_w_n": 182.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 221.0,
|
||
|
|
"completions/max_terminated_length": 221.0,
|
||
|
|
"completions/mean_length": 0.86328125,
|
||
|
|
"completions/mean_terminated_length": 221.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 221.0,
|
||
|
|
"epoch": 0.2992,
|
||
|
|
"grad_norm": 10.69324779510498,
|
||
|
|
"learning_rate": 7.777777777777778e-08,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 48533581.0,
|
||
|
|
"reward": 0.748046875,
|
||
|
|
"reward_std": 0.17594116926193237,
|
||
|
|
"rewards/accuracy_reward_step": 0.31640625,
|
||
|
|
"rewards/format_reward_step": 0.86328125,
|
||
|
|
"step": 187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.953125,
|
||
|
|
"calib/auroc": 0.761209288739594,
|
||
|
|
"calib/avg_num_step_conf": 0.828125,
|
||
|
|
"calib/ece": 0.3547389558232932,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.70703125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0321285140562249,
|
||
|
|
"calib/gap": 0.26031035489995613,
|
||
|
|
"calib/mean_conf": 0.6760240963855421,
|
||
|
|
"calib/mu_c": 0.850609756097561,
|
||
|
|
"calib/mu_w": 0.5902994011976048,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.75,
|
||
|
|
"calib/pce": 0.350722891566265,
|
||
|
|
"calib/std_conf": 0.31946594400530953,
|
||
|
|
"calib/step_conf_rate": 0.75,
|
||
|
|
"calib/step_q_c": 0.8476190476190476,
|
||
|
|
"calib/step_q_c_n": 84.0,
|
||
|
|
"calib/step_q_gap": 0.17574404761904772,
|
||
|
|
"calib/step_q_w": 0.6718749999999999,
|
||
|
|
"calib/step_q_w_n": 128.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 960.0,
|
||
|
|
"completions/max_terminated_length": 960.0,
|
||
|
|
"completions/mean_length": 3.75,
|
||
|
|
"completions/mean_terminated_length": 960.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 960.0,
|
||
|
|
"epoch": 0.3008,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 7.222222222222221e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 48790861.0,
|
||
|
|
"reward": 0.673828125,
|
||
|
|
"reward_std": 0.2038663625717163,
|
||
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
||
|
|
"rewards/format_reward_step": 0.70703125,
|
||
|
|
"step": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.99609375,
|
||
|
|
"calib/auroc": 0.8780393918008598,
|
||
|
|
"calib/avg_num_step_conf": 0.984375,
|
||
|
|
"calib/ece": 0.2922619047619048,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.84375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.10714285714285714,
|
||
|
|
"calib/gap": 0.2927356130108425,
|
||
|
|
"calib/mean_conf": 0.7248015873015872,
|
||
|
|
"calib/mu_c": 0.8909174311926606,
|
||
|
|
"calib/mu_w": 0.5981818181818181,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.87890625,
|
||
|
|
"calib/pce": 0.2922619047619048,
|
||
|
|
"calib/std_conf": 0.3002267005839291,
|
||
|
|
"calib/step_conf_rate": 0.87890625,
|
||
|
|
"calib/step_q_c": 0.8728828828828831,
|
||
|
|
"calib/step_q_c_n": 111.0,
|
||
|
|
"calib/step_q_gap": 0.23423040061337963,
|
||
|
|
"calib/step_q_w": 0.6386524822695034,
|
||
|
|
"calib/step_q_w_n": 141.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 235.0,
|
||
|
|
"completions/max_terminated_length": 235.0,
|
||
|
|
"completions/mean_length": 1.48828125,
|
||
|
|
"completions/mean_terminated_length": 190.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 146.0,
|
||
|
|
"epoch": 0.3024,
|
||
|
|
"grad_norm": 8.809115409851074,
|
||
|
|
"learning_rate": 6.666666666666667e-08,
|
||
|
|
"loss": 0.0332,
|
||
|
|
"num_tokens": 49049130.0,
|
||
|
|
"reward": 0.84765625,
|
||
|
|
"reward_std": 0.17455264925956726,
|
||
|
|
"rewards/accuracy_reward_step": 0.42578125,
|
||
|
|
"rewards/format_reward_step": 0.84375,
|
||
|
|
"step": 189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9609375,
|
||
|
|
"calib/auroc": 0.8290183387270765,
|
||
|
|
"calib/avg_num_step_conf": 0.95703125,
|
||
|
|
"calib/ece": 0.4649402390438246,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.76171875,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08366533864541832,
|
||
|
|
"calib/gap": 0.28670118662351696,
|
||
|
|
"calib/mean_conf": 0.6362549800796812,
|
||
|
|
"calib/mu_c": 0.8715555555555559,
|
||
|
|
"calib/mu_w": 0.5848543689320389,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8046875,
|
||
|
|
"calib/pce": 0.46095617529880467,
|
||
|
|
"calib/std_conf": 0.3415507153563842,
|
||
|
|
"calib/step_conf_rate": 0.8046875,
|
||
|
|
"calib/step_q_c": 0.8249090909090913,
|
||
|
|
"calib/step_q_c_n": 55.0,
|
||
|
|
"calib/step_q_gap": 0.14096172248803884,
|
||
|
|
"calib/step_q_w": 0.6839473684210524,
|
||
|
|
"calib/step_q_w_n": 190.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.304,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 6.111111111111111e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 49299186.0,
|
||
|
|
"reward": 0.560546875,
|
||
|
|
"reward_std": 0.1769346296787262,
|
||
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
||
|
|
"rewards/format_reward_step": 0.76171875,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9765625,
|
||
|
|
"calib/auroc": 0.7287003120336454,
|
||
|
|
"calib/avg_num_step_conf": 1.09375,
|
||
|
|
"calib/ece": 0.40391304347826085,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.90234375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.043478260869565216,
|
||
|
|
"calib/gap": 0.1730375797042466,
|
||
|
|
"calib/mean_conf": 0.763596837944664,
|
||
|
|
"calib/mu_c": 0.8743956043956046,
|
||
|
|
"calib/mu_w": 0.701358024691358,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.9453125,
|
||
|
|
"calib/pce": 0.40391304347826085,
|
||
|
|
"calib/std_conf": 0.24516370974306465,
|
||
|
|
"calib/step_conf_rate": 0.9453125,
|
||
|
|
"calib/step_q_c": 0.8656701030927835,
|
||
|
|
"calib/step_q_c_n": 97.0,
|
||
|
|
"calib/step_q_gap": 0.12069742549715512,
|
||
|
|
"calib/step_q_w": 0.7449726775956284,
|
||
|
|
"calib/step_q_w_n": 183.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 106.0,
|
||
|
|
"completions/max_terminated_length": 106.0,
|
||
|
|
"completions/mean_length": 0.76953125,
|
||
|
|
"completions/mean_terminated_length": 98.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 91.0,
|
||
|
|
"epoch": 0.3056,
|
||
|
|
"grad_norm": 19.046396255493164,
|
||
|
|
"learning_rate": 5.555555555555555e-08,
|
||
|
|
"loss": 0.0575,
|
||
|
|
"num_tokens": 49560431.0,
|
||
|
|
"reward": 0.806640625,
|
||
|
|
"reward_std": 0.1605677306652069,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.90234375,
|
||
|
|
"step": 191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.97265625,
|
||
|
|
"calib/auroc": 0.7376984126984127,
|
||
|
|
"calib/avg_num_step_conf": 0.96875,
|
||
|
|
"calib/ece": 0.4807539682539681,
|
||
|
|
"calib/final_conf_rate": 0.984375,
|
||
|
|
"calib/format_rate": 0.8125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.03968253968253968,
|
||
|
|
"calib/gap": 0.2616666666666664,
|
||
|
|
"calib/mean_conf": 0.6474206349206351,
|
||
|
|
"calib/mu_c": 0.8654761904761903,
|
||
|
|
"calib/mu_w": 0.6038095238095239,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
||
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8515625,
|
||
|
|
"calib/pce": 0.4807539682539681,
|
||
|
|
"calib/std_conf": 0.3317272223535333,
|
||
|
|
"calib/step_conf_rate": 0.8515625,
|
||
|
|
"calib/step_q_c": 0.8222222222222222,
|
||
|
|
"calib/step_q_c_n": 45.0,
|
||
|
|
"calib/step_q_gap": 0.11975916803503006,
|
||
|
|
"calib/step_q_w": 0.7024630541871921,
|
||
|
|
"calib/step_q_w_n": 203.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 768.0,
|
||
|
|
"completions/max_terminated_length": 768.0,
|
||
|
|
"completions/mean_length": 3.87890625,
|
||
|
|
"completions/mean_terminated_length": 496.5,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 225.0,
|
||
|
|
"epoch": 0.3072,
|
||
|
|
"grad_norm": 6.2011237144470215,
|
||
|
|
"learning_rate": 5e-08,
|
||
|
|
"loss": 0.0518,
|
||
|
|
"num_tokens": 49823568.0,
|
||
|
|
"reward": 0.57421875,
|
||
|
|
"reward_std": 0.19247063994407654,
|
||
|
|
"rewards/accuracy_reward_step": 0.16796875,
|
||
|
|
"rewards/format_reward_step": 0.8125,
|
||
|
|
"step": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.95703125,
|
||
|
|
"calib/auroc": 0.834324582991523,
|
||
|
|
"calib/avg_num_step_conf": 1.14453125,
|
||
|
|
"calib/ece": 0.30621513944223105,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.765625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.099601593625498,
|
||
|
|
"calib/gap": 0.3342049494120862,
|
||
|
|
"calib/mean_conf": 0.6727490039840638,
|
||
|
|
"calib/mu_c": 0.8844565217391304,
|
||
|
|
"calib/mu_w": 0.5502515723270441,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.80859375,
|
||
|
|
"calib/pce": 0.30621513944223105,
|
||
|
|
"calib/std_conf": 0.3414032630145996,
|
||
|
|
"calib/step_conf_rate": 0.80859375,
|
||
|
|
"calib/step_q_c": 0.8537593984962407,
|
||
|
|
"calib/step_q_c_n": 133.0,
|
||
|
|
"calib/step_q_gap": 0.13188439849624078,
|
||
|
|
"calib/step_q_w": 0.7218749999999999,
|
||
|
|
"calib/step_q_w_n": 160.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 1.0,
|
||
|
|
"completions/max_length": 0.0,
|
||
|
|
"completions/max_terminated_length": 0.0,
|
||
|
|
"completions/mean_length": 0.0,
|
||
|
|
"completions/mean_terminated_length": 0.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 0.0,
|
||
|
|
"epoch": 0.3088,
|
||
|
|
"grad_norm": 0.0,
|
||
|
|
"learning_rate": 4.444444444444444e-08,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 50084880.0,
|
||
|
|
"reward": 0.7421875,
|
||
|
|
"reward_std": 0.15903086960315704,
|
||
|
|
"rewards/accuracy_reward_step": 0.359375,
|
||
|
|
"rewards/format_reward_step": 0.765625,
|
||
|
|
"step": 193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.94921875,
|
||
|
|
"calib/auroc": 0.7670283303194696,
|
||
|
|
"calib/avg_num_step_conf": 0.984375,
|
||
|
|
"calib/ece": 0.34651821862348187,
|
||
|
|
"calib/final_conf_rate": 0.96484375,
|
||
|
|
"calib/format_rate": 0.78125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.0728744939271255,
|
||
|
|
"calib/gap": 0.3051740506329117,
|
||
|
|
"calib/mean_conf": 0.6663562753036437,
|
||
|
|
"calib/mu_c": 0.8739240506329117,
|
||
|
|
"calib/mu_w": 0.56875,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.8359375,
|
||
|
|
"calib/pce": 0.34651821862348187,
|
||
|
|
"calib/std_conf": 0.33855803207737123,
|
||
|
|
"calib/step_conf_rate": 0.8359375,
|
||
|
|
"calib/step_q_c": 0.8615789473684213,
|
||
|
|
"calib/step_q_c_n": 95.0,
|
||
|
|
"calib/step_q_gap": 0.18036875628561888,
|
||
|
|
"calib/step_q_w": 0.6812101910828025,
|
||
|
|
"calib/step_q_w_n": 157.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.984375,
|
||
|
|
"completions/max_length": 259.0,
|
||
|
|
"completions/max_terminated_length": 259.0,
|
||
|
|
"completions/mean_length": 2.8125,
|
||
|
|
"completions/mean_terminated_length": 180.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 115.0,
|
||
|
|
"epoch": 0.3104,
|
||
|
|
"grad_norm": 17.856693267822266,
|
||
|
|
"learning_rate": 3.888888888888889e-08,
|
||
|
|
"loss": 0.0842,
|
||
|
|
"num_tokens": 50345448.0,
|
||
|
|
"reward": 0.69921875,
|
||
|
|
"reward_std": 0.2514913082122803,
|
||
|
|
"rewards/accuracy_reward_step": 0.30859375,
|
||
|
|
"rewards/format_reward_step": 0.78125,
|
||
|
|
"step": 194
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.953125,
|
||
|
|
"calib/auroc": 0.7878414688759515,
|
||
|
|
"calib/avg_num_step_conf": 0.921875,
|
||
|
|
"calib/ece": 0.3490438247011953,
|
||
|
|
"calib/final_conf_rate": 0.98046875,
|
||
|
|
"calib/format_rate": 0.734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.04780876494023904,
|
||
|
|
"calib/gap": 0.3164591730108969,
|
||
|
|
"calib/mean_conf": 0.655816733067729,
|
||
|
|
"calib/mu_c": 0.875194805194805,
|
||
|
|
"calib/mu_w": 0.5587356321839081,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.78515625,
|
||
|
|
"calib/pce": 0.3490438247011953,
|
||
|
|
"calib/std_conf": 0.3450313662611969,
|
||
|
|
"calib/step_conf_rate": 0.78515625,
|
||
|
|
"calib/step_q_c": 0.8465263157894733,
|
||
|
|
"calib/step_q_c_n": 95.0,
|
||
|
|
"calib/step_q_gap": 0.18659723777528892,
|
||
|
|
"calib/step_q_w": 0.6599290780141844,
|
||
|
|
"calib/step_q_w_n": 141.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.9921875,
|
||
|
|
"completions/max_length": 239.0,
|
||
|
|
"completions/max_terminated_length": 239.0,
|
||
|
|
"completions/mean_length": 1.546875,
|
||
|
|
"completions/mean_terminated_length": 198.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 157.0,
|
||
|
|
"epoch": 0.312,
|
||
|
|
"grad_norm": 9.03911018371582,
|
||
|
|
"learning_rate": 3.3333333333333334e-08,
|
||
|
|
"loss": 0.0399,
|
||
|
|
"num_tokens": 50603364.0,
|
||
|
|
"reward": 0.66796875,
|
||
|
|
"reward_std": 0.24204961955547333,
|
||
|
|
"rewards/accuracy_reward_step": 0.30078125,
|
||
|
|
"rewards/format_reward_step": 0.734375,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.9765625,
|
||
|
|
"calib/auroc": 0.6827997489014438,
|
||
|
|
"calib/avg_num_step_conf": 1.015625,
|
||
|
|
"calib/ece": 0.3133201581027668,
|
||
|
|
"calib/final_conf_rate": 0.98828125,
|
||
|
|
"calib/format_rate": 0.8515625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08300395256916997,
|
||
|
|
"calib/gap": 0.17855555555555536,
|
||
|
|
"calib/mean_conf": 0.7797233201581029,
|
||
|
|
"calib/mu_c": 0.8749999999999999,
|
||
|
|
"calib/mu_w": 0.6964444444444445,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.87890625,
|
||
|
|
"calib/pce": 0.3133201581027668,
|
||
|
|
"calib/std_conf": 0.2399594879543884,
|
||
|
|
"calib/step_conf_rate": 0.87890625,
|
||
|
|
"calib/step_q_c": 0.8587591240875913,
|
||
|
|
"calib/step_q_c_n": 137.0,
|
||
|
|
"calib/step_q_gap": 0.07079164441279462,
|
||
|
|
"calib/step_q_w": 0.7879674796747966,
|
||
|
|
"calib/step_q_w_n": 123.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 112.0,
|
||
|
|
"completions/max_terminated_length": 112.0,
|
||
|
|
"completions/mean_length": 0.4375,
|
||
|
|
"completions/mean_terminated_length": 112.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 112.0,
|
||
|
|
"epoch": 0.3136,
|
||
|
|
"grad_norm": 3.043510675430298,
|
||
|
|
"learning_rate": 2.7777777777777774e-08,
|
||
|
|
"loss": 0.0055,
|
||
|
|
"num_tokens": 50865396.0,
|
||
|
|
"reward": 0.88671875,
|
||
|
|
"reward_std": 0.2121565341949463,
|
||
|
|
"rewards/accuracy_reward_step": 0.4609375,
|
||
|
|
"rewards/format_reward_step": 0.8515625,
|
||
|
|
"step": 196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98046875,
|
||
|
|
"calib/auroc": 0.7620481927710844,
|
||
|
|
"calib/avg_num_step_conf": 1.1328125,
|
||
|
|
"calib/ece": 0.34219999999999995,
|
||
|
|
"calib/final_conf_rate": 0.9765625,
|
||
|
|
"calib/format_rate": 0.86328125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.028,
|
||
|
|
"calib/gap": 0.2590935169248425,
|
||
|
|
"calib/mean_conf": 0.6781999999999999,
|
||
|
|
"calib/mu_c": 0.8502380952380955,
|
||
|
|
"calib/mu_w": 0.591144578313253,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.90625,
|
||
|
|
"calib/pce": 0.34219999999999995,
|
||
|
|
"calib/std_conf": 0.30800707784075354,
|
||
|
|
"calib/step_conf_rate": 0.90625,
|
||
|
|
"calib/step_q_c": 0.8568539325842699,
|
||
|
|
"calib/step_q_c_n": 89.0,
|
||
|
|
"calib/step_q_gap": 0.19625691765889686,
|
||
|
|
"calib/step_q_w": 0.660597014925373,
|
||
|
|
"calib/step_q_w_n": 201.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 298.0,
|
||
|
|
"completions/max_terminated_length": 298.0,
|
||
|
|
"completions/mean_length": 2.43359375,
|
||
|
|
"completions/mean_terminated_length": 207.6666717529297,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 128.0,
|
||
|
|
"epoch": 0.3152,
|
||
|
|
"grad_norm": 31.83425521850586,
|
||
|
|
"learning_rate": 2.222222222222222e-08,
|
||
|
|
"loss": 0.1026,
|
||
|
|
"num_tokens": 51125291.0,
|
||
|
|
"reward": 0.763671875,
|
||
|
|
"reward_std": 0.18628305196762085,
|
||
|
|
"rewards/accuracy_reward_step": 0.33203125,
|
||
|
|
"rewards/format_reward_step": 0.86328125,
|
||
|
|
"step": 197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.953125,
|
||
|
|
"calib/auroc": 0.8009259259259259,
|
||
|
|
"calib/avg_num_step_conf": 0.96875,
|
||
|
|
"calib/ece": 0.39666666666666656,
|
||
|
|
"calib/final_conf_rate": 0.9609375,
|
||
|
|
"calib/format_rate": 0.734375,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.08130081300813008,
|
||
|
|
"calib/gap": 0.2887121212121212,
|
||
|
|
"calib/mean_conf": 0.664959349593496,
|
||
|
|
"calib/mu_c": 0.8762121212121212,
|
||
|
|
"calib/mu_w": 0.5875,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.9609375,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.96484375,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.7578125,
|
||
|
|
"calib/pce": 0.39666666666666656,
|
||
|
|
"calib/std_conf": 0.34441271796895645,
|
||
|
|
"calib/step_conf_rate": 0.7578125,
|
||
|
|
"calib/step_q_c": 0.8543055555555555,
|
||
|
|
"calib/step_q_c_n": 72.0,
|
||
|
|
"calib/step_q_gap": 0.1301578282828283,
|
||
|
|
"calib/step_q_w": 0.7241477272727272,
|
||
|
|
"calib/step_q_w_n": 176.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 301.0,
|
||
|
|
"completions/max_terminated_length": 301.0,
|
||
|
|
"completions/mean_length": 1.17578125,
|
||
|
|
"completions/mean_terminated_length": 301.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 301.0,
|
||
|
|
"epoch": 0.3168,
|
||
|
|
"grad_norm": 3.0404303073883057,
|
||
|
|
"learning_rate": 1.6666666666666667e-08,
|
||
|
|
"loss": 0.0146,
|
||
|
|
"num_tokens": 51387736.0,
|
||
|
|
"reward": 0.625,
|
||
|
|
"reward_std": 0.24578902125358582,
|
||
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
||
|
|
"rewards/format_reward_step": 0.734375,
|
||
|
|
"step": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.96875,
|
||
|
|
"calib/auroc": 0.7875337022846601,
|
||
|
|
"calib/avg_num_step_conf": 0.97265625,
|
||
|
|
"calib/ece": 0.34646586345381525,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.8203125,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.05220883534136546,
|
||
|
|
"calib/gap": 0.2692443593018303,
|
||
|
|
"calib/mean_conf": 0.6958634538152609,
|
||
|
|
"calib/mu_c": 0.8710344827586204,
|
||
|
|
"calib/mu_w": 0.6017901234567901,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.9765625,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.85546875,
|
||
|
|
"calib/pce": 0.34646586345381525,
|
||
|
|
"calib/std_conf": 0.304499321471865,
|
||
|
|
"calib/step_conf_rate": 0.85546875,
|
||
|
|
"calib/step_q_c": 0.8557291666666668,
|
||
|
|
"calib/step_q_c_n": 96.0,
|
||
|
|
"calib/step_q_gap": 0.1694546568627452,
|
||
|
|
"calib/step_q_w": 0.6862745098039216,
|
||
|
|
"calib/step_q_w_n": 153.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.99609375,
|
||
|
|
"completions/max_length": 255.0,
|
||
|
|
"completions/max_terminated_length": 255.0,
|
||
|
|
"completions/mean_length": 0.99609375,
|
||
|
|
"completions/mean_terminated_length": 255.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 255.0,
|
||
|
|
"epoch": 0.3184,
|
||
|
|
"grad_norm": 6.873912811279297,
|
||
|
|
"learning_rate": 1.111111111111111e-08,
|
||
|
|
"loss": 0.0253,
|
||
|
|
"num_tokens": 51650103.0,
|
||
|
|
"reward": 0.75,
|
||
|
|
"reward_std": 0.19911831617355347,
|
||
|
|
"rewards/accuracy_reward_step": 0.33984375,
|
||
|
|
"rewards/format_reward_step": 0.8203125,
|
||
|
|
"step": 199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calib/answer_extract_rate": 0.98828125,
|
||
|
|
"calib/auroc": 0.7357768813465017,
|
||
|
|
"calib/avg_num_step_conf": 0.87109375,
|
||
|
|
"calib/ece": 0.311566265060241,
|
||
|
|
"calib/final_conf_rate": 0.97265625,
|
||
|
|
"calib/format_rate": 0.69140625,
|
||
|
|
"calib/frac_conf_gt_0.9": 0.1606425702811245,
|
||
|
|
"calib/gap": 0.30241062734733604,
|
||
|
|
"calib/mean_conf": 0.6714056224899598,
|
||
|
|
"calib/mu_c": 0.8632967032967032,
|
||
|
|
"calib/mu_w": 0.5608860759493671,
|
||
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
||
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
||
|
|
"calib/nonempty_step_conf_rate": 0.71484375,
|
||
|
|
"calib/pce": 0.3087550200803213,
|
||
|
|
"calib/std_conf": 0.3414251258986275,
|
||
|
|
"calib/step_conf_rate": 0.71484375,
|
||
|
|
"calib/step_q_c": 0.8533333333333335,
|
||
|
|
"calib/step_q_c_n": 102.0,
|
||
|
|
"calib/step_q_gap": 0.09696969696969726,
|
||
|
|
"calib/step_q_w": 0.7563636363636362,
|
||
|
|
"calib/step_q_w_n": 121.0,
|
||
|
|
"clip_ratio/high_max": NaN,
|
||
|
|
"clip_ratio/high_mean": NaN,
|
||
|
|
"clip_ratio/low_mean": NaN,
|
||
|
|
"clip_ratio/low_min": NaN,
|
||
|
|
"clip_ratio/region_mean": NaN,
|
||
|
|
"completions/clipped_ratio": 0.98828125,
|
||
|
|
"completions/max_length": 431.0,
|
||
|
|
"completions/max_terminated_length": 431.0,
|
||
|
|
"completions/mean_length": 2.87109375,
|
||
|
|
"completions/mean_terminated_length": 245.0,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 132.0,
|
||
|
|
"epoch": 0.32,
|
||
|
|
"grad_norm": 9.112618446350098,
|
||
|
|
"learning_rate": 5.555555555555555e-09,
|
||
|
|
"loss": 0.0405,
|
||
|
|
"num_tokens": 51908390.0,
|
||
|
|
"reward": 0.701171875,
|
||
|
|
"reward_std": 0.19281864166259766,
|
||
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
||
|
|
"rewards/format_reward_step": 0.69140625,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"step": 200,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.01296201538760215,
|
||
|
|
"train_runtime": 3528.5457,
|
||
|
|
"train_samples_per_second": 14.51,
|
||
|
|
"train_steps_per_second": 0.057
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 200,
|
||
|
|
"num_input_tokens_seen": 51908390,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 25,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 4,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|