10238 lines
398 KiB
JSON
10238 lines
398 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.21333333333333335,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.4131654458386558,
|
|
"aux_distill/mean_u": 0.31677682190706,
|
|
"aux_distill/n_active_tok": 24.571428571428573,
|
|
"calib/answer_extract_rate": 0.08203125,
|
|
"calib/auroc": 0.6944444444444445,
|
|
"calib/avg_num_step_conf": 0.3359375,
|
|
"calib/ece": 0.6230769230769231,
|
|
"calib/final_conf_rate": 0.05078125,
|
|
"calib/format_rate": 0.04296875,
|
|
"calib/frac_conf_gt_0.9": 0.7692307692307693,
|
|
"calib/gap": 0.03861111111111115,
|
|
"calib/mean_conf": 0.9307692307692309,
|
|
"calib/mu_c": 0.9575,
|
|
"calib/mu_w": 0.9188888888888889,
|
|
"calib/nonempty_final_conf_rate": 0.05078125,
|
|
"calib/nonempty_reasoning_rate": 0.09765625,
|
|
"calib/nonempty_step_conf_rate": 0.0703125,
|
|
"calib/pce": 0.6230769230769231,
|
|
"calib/std_conf": 0.07965903671384378,
|
|
"calib/step_conf_rate": 0.0703125,
|
|
"calib/step_q_c": 0.8921052631578947,
|
|
"calib/step_q_c_n": 19.0,
|
|
"calib/step_q_gap": 0.19807541241162607,
|
|
"calib/step_q_w": 0.6940298507462687,
|
|
"calib/step_q_w_n": 67.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08984375,
|
|
"completions/max_length": 2955.0,
|
|
"completions/max_terminated_length": 2955.0,
|
|
"completions/mean_length": 613.67578125,
|
|
"completions/mean_terminated_length": 674.2532348632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0010666666666666667,
|
|
"grad_norm": 0.04508271813392639,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": 0.2169,
|
|
"num_tokens": 264685.0,
|
|
"reward": 0.037574999034404755,
|
|
"reward_std": 0.07449960708618164,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.01655624993145466,
|
|
"rewards/format_reward_step": 0.04296875,
|
|
"step": 1
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1092121005058289,
|
|
"aux_distill/mean_u": 0.2935626227740425,
|
|
"aux_distill/n_active_tok": 28.63157894736842,
|
|
"calib/answer_extract_rate": 0.13671875,
|
|
"calib/auroc": 0.5338345864661654,
|
|
"calib/avg_num_step_conf": 0.55078125,
|
|
"calib/ece": 0.6261538461538463,
|
|
"calib/final_conf_rate": 0.1015625,
|
|
"calib/format_rate": 0.08984375,
|
|
"calib/frac_conf_gt_0.9": 0.7692307692307693,
|
|
"calib/gap": 0.002406015037593856,
|
|
"calib/mean_conf": 0.8953846153846153,
|
|
"calib/mu_c": 0.897142857142857,
|
|
"calib/mu_w": 0.8947368421052632,
|
|
"calib/nonempty_final_conf_rate": 0.1015625,
|
|
"calib/nonempty_reasoning_rate": 0.14453125,
|
|
"calib/nonempty_step_conf_rate": 0.109375,
|
|
"calib/pce": 0.6261538461538463,
|
|
"calib/std_conf": 0.18653172073466937,
|
|
"calib/step_conf_rate": 0.109375,
|
|
"calib/step_q_c": 0.781,
|
|
"calib/step_q_c_n": 20.0,
|
|
"calib/step_q_gap": -0.042553719008264435,
|
|
"calib/step_q_w": 0.8235537190082645,
|
|
"calib/step_q_w_n": 121.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0546875,
|
|
"completions/max_length": 3001.0,
|
|
"completions/max_terminated_length": 3001.0,
|
|
"completions/mean_length": 646.4609375,
|
|
"completions/mean_terminated_length": 683.8594970703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0021333333333333334,
|
|
"grad_norm": 0.03766733407974243,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 0.263,
|
|
"num_tokens": 533467.0,
|
|
"reward": 0.07537207007408142,
|
|
"reward_std": 0.14035090804100037,
|
|
"rewards/accuracy_reward_step": 0.03125,
|
|
"rewards/final_brier_reward_step": 0.02965039201080799,
|
|
"rewards/format_reward_step": 0.08984375,
|
|
"step": 2
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.375932554403941,
|
|
"aux_distill/mean_u": 0.24507726546495304,
|
|
"aux_distill/n_active_tok": 23.0,
|
|
"calib/answer_extract_rate": 0.05859375,
|
|
"calib/auroc": 0.24242424242424243,
|
|
"calib/avg_num_step_conf": 0.26953125,
|
|
"calib/ece": 0.7335714285714285,
|
|
"calib/final_conf_rate": 0.0546875,
|
|
"calib/format_rate": 0.04296875,
|
|
"calib/frac_conf_gt_0.9": 0.8571428571428571,
|
|
"calib/gap": -0.05242424242424237,
|
|
"calib/mean_conf": 0.947857142857143,
|
|
"calib/mu_c": 0.9066666666666666,
|
|
"calib/mu_w": 0.959090909090909,
|
|
"calib/nonempty_final_conf_rate": 0.0546875,
|
|
"calib/nonempty_reasoning_rate": 0.06640625,
|
|
"calib/nonempty_step_conf_rate": 0.0546875,
|
|
"calib/pce": 0.7335714285714285,
|
|
"calib/std_conf": 0.056083938549867415,
|
|
"calib/step_conf_rate": 0.0546875,
|
|
"calib/step_q_c": 0.6900000000000001,
|
|
"calib/step_q_c_n": 17.0,
|
|
"calib/step_q_gap": -0.17057692307692285,
|
|
"calib/step_q_w": 0.8605769230769229,
|
|
"calib/step_q_w_n": 52.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0703125,
|
|
"completions/max_length": 3017.0,
|
|
"completions/max_terminated_length": 3017.0,
|
|
"completions/mean_length": 693.890625,
|
|
"completions/mean_terminated_length": 746.3698120117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.03947603330016136,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.2027,
|
|
"num_tokens": 816359.0,
|
|
"reward": 0.03406660258769989,
|
|
"reward_std": 0.07613541185855865,
|
|
"rewards/accuracy_reward_step": 0.01171875,
|
|
"rewards/final_brier_reward_step": 0.013445701450109482,
|
|
"rewards/format_reward_step": 0.04296875,
|
|
"step": 3
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.2811786349003131,
|
|
"aux_distill/mean_u": 0.3183269252095441,
|
|
"aux_distill/n_active_tok": 22.153846153846153,
|
|
"calib/answer_extract_rate": 0.06640625,
|
|
"calib/avg_num_step_conf": 0.28125,
|
|
"calib/ece": 0.7961538461538462,
|
|
"calib/final_conf_rate": 0.05078125,
|
|
"calib/format_rate": 0.03125,
|
|
"calib/frac_conf_gt_0.9": 0.6923076923076923,
|
|
"calib/mean_conf": 0.7961538461538461,
|
|
"calib/mu_c": NaN,
|
|
"calib/mu_w": 0.7961538461538461,
|
|
"calib/nonempty_final_conf_rate": 0.05078125,
|
|
"calib/nonempty_reasoning_rate": 0.0859375,
|
|
"calib/nonempty_step_conf_rate": 0.05859375,
|
|
"calib/pce": 0.7961538461538462,
|
|
"calib/std_conf": 0.322121297305091,
|
|
"calib/step_conf_rate": 0.05859375,
|
|
"calib/step_q_w": 0.7857407407407409,
|
|
"calib/step_q_w_n": 72.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.07421875,
|
|
"completions/max_length": 2930.0,
|
|
"completions/max_terminated_length": 2930.0,
|
|
"completions/mean_length": 648.04296875,
|
|
"completions/mean_terminated_length": 699.9957275390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.004266666666666667,
|
|
"grad_norm": 0.03017072007060051,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.1711,
|
|
"num_tokens": 1088426.0,
|
|
"reward": 0.019221873953938484,
|
|
"reward_std": 0.0504031628370285,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.007193749770522118,
|
|
"rewards/format_reward_step": 0.03125,
|
|
"step": 4
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1949924447319724,
|
|
"aux_distill/mean_u": 0.36153202551690744,
|
|
"aux_distill/n_active_tok": 33.45454545454545,
|
|
"calib/answer_extract_rate": 0.0703125,
|
|
"calib/auroc": 0.375,
|
|
"calib/avg_num_step_conf": 0.359375,
|
|
"calib/ece": 0.6869230769230771,
|
|
"calib/final_conf_rate": 0.05078125,
|
|
"calib/format_rate": 0.0390625,
|
|
"calib/frac_conf_gt_0.9": 0.7692307692307693,
|
|
"calib/gap": -0.1050000000000001,
|
|
"calib/mean_conf": 0.9176923076923077,
|
|
"calib/mu_c": 0.845,
|
|
"calib/mu_w": 0.9500000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.05078125,
|
|
"calib/nonempty_reasoning_rate": 0.09375,
|
|
"calib/nonempty_step_conf_rate": 0.0625,
|
|
"calib/pce": 0.6484615384615386,
|
|
"calib/std_conf": 0.12490943464728885,
|
|
"calib/step_conf_rate": 0.0625,
|
|
"calib/step_q_c": 0.7553846153846155,
|
|
"calib/step_q_c_n": 13.0,
|
|
"calib/step_q_gap": -0.02170525803310608,
|
|
"calib/step_q_w": 0.7770898734177216,
|
|
"calib/step_q_w_n": 79.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.10546875,
|
|
"completions/max_length": 3058.0,
|
|
"completions/max_terminated_length": 3058.0,
|
|
"completions/mean_length": 739.875,
|
|
"completions/mean_terminated_length": 827.1091918945312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.005333333333333333,
|
|
"grad_norm": 0.018968043848872185,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.173,
|
|
"num_tokens": 1384522.0,
|
|
"reward": 0.034125782549381256,
|
|
"reward_std": 0.07533922791481018,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.013564062304794788,
|
|
"rewards/format_reward_step": 0.0390625,
|
|
"step": 5
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.3305943420058803,
|
|
"aux_distill/mean_u": 0.3693736034622527,
|
|
"aux_distill/n_active_tok": 24.63157894736842,
|
|
"calib/answer_extract_rate": 0.109375,
|
|
"calib/auroc": 0.2777777777777778,
|
|
"calib/avg_num_step_conf": 0.48046875,
|
|
"calib/ece": 0.8085,
|
|
"calib/final_conf_rate": 0.078125,
|
|
"calib/format_rate": 0.06640625,
|
|
"calib/frac_conf_gt_0.9": 0.6,
|
|
"calib/gap": -0.07611111111111091,
|
|
"calib/mean_conf": 0.9085000000000001,
|
|
"calib/mu_c": 0.8400000000000001,
|
|
"calib/mu_w": 0.916111111111111,
|
|
"calib/nonempty_final_conf_rate": 0.078125,
|
|
"calib/nonempty_reasoning_rate": 0.12890625,
|
|
"calib/nonempty_step_conf_rate": 0.09375,
|
|
"calib/pce": 0.8085,
|
|
"calib/std_conf": 0.09078959191449205,
|
|
"calib/step_conf_rate": 0.09375,
|
|
"calib/step_q_c": 0.858125,
|
|
"calib/step_q_c_n": 16.0,
|
|
"calib/step_q_gap": 0.034386682242990574,
|
|
"calib/step_q_w": 0.8237383177570095,
|
|
"calib/step_q_w_n": 107.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1015625,
|
|
"completions/max_length": 3013.0,
|
|
"completions/max_terminated_length": 3013.0,
|
|
"completions/mean_length": 626.8515625,
|
|
"completions/mean_terminated_length": 697.7130126953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.04654983431100845,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 0.2868,
|
|
"num_tokens": 1650948.0,
|
|
"reward": 0.047386325895786285,
|
|
"reward_std": 0.09225471317768097,
|
|
"rewards/accuracy_reward_step": 0.01171875,
|
|
"rewards/final_brier_reward_step": 0.01664765551686287,
|
|
"rewards/format_reward_step": 0.06640625,
|
|
"step": 6
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.3733870275318623,
|
|
"aux_distill/mean_u": 0.36923659447005963,
|
|
"aux_distill/n_active_tok": 27.5,
|
|
"calib/answer_extract_rate": 0.1171875,
|
|
"calib/auroc": 0.32307692307692304,
|
|
"calib/avg_num_step_conf": 0.43359375,
|
|
"calib/ece": 0.6955555555555555,
|
|
"calib/final_conf_rate": 0.0703125,
|
|
"calib/format_rate": 0.0546875,
|
|
"calib/frac_conf_gt_0.9": 0.8888888888888888,
|
|
"calib/gap": -0.020615384615384924,
|
|
"calib/mean_conf": 0.958888888888889,
|
|
"calib/mu_c": 0.944,
|
|
"calib/mu_w": 0.9646153846153849,
|
|
"calib/nonempty_final_conf_rate": 0.0703125,
|
|
"calib/nonempty_reasoning_rate": 0.14453125,
|
|
"calib/nonempty_step_conf_rate": 0.09375,
|
|
"calib/pce": 0.6883333333333332,
|
|
"calib/std_conf": 0.029979416807182312,
|
|
"calib/step_conf_rate": 0.09375,
|
|
"calib/step_q_c": 0.7900111111111112,
|
|
"calib/step_q_c_n": 18.0,
|
|
"calib/step_q_gap": 0.07941218637992853,
|
|
"calib/step_q_w": 0.7105989247311827,
|
|
"calib/step_q_w_n": 93.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.06640625,
|
|
"completions/max_length": 2710.0,
|
|
"completions/max_terminated_length": 2710.0,
|
|
"completions/mean_length": 739.20703125,
|
|
"completions/mean_terminated_length": 791.78662109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.007466666666666667,
|
|
"grad_norm": 0.04697367548942566,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.2593,
|
|
"num_tokens": 1947609.0,
|
|
"reward": 0.04775039106607437,
|
|
"reward_std": 0.11989802122116089,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.021282030269503593,
|
|
"rewards/format_reward_step": 0.0546875,
|
|
"step": 7
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.3266922648136432,
|
|
"aux_distill/mean_u": 0.37519659554870155,
|
|
"aux_distill/n_active_tok": 30.153846153846153,
|
|
"calib/answer_extract_rate": 0.1015625,
|
|
"calib/auroc": 0.4666666666666667,
|
|
"calib/avg_num_step_conf": 0.3828125,
|
|
"calib/ece": 0.5494117647058824,
|
|
"calib/final_conf_rate": 0.06640625,
|
|
"calib/format_rate": 0.04296875,
|
|
"calib/frac_conf_gt_0.9": 0.7058823529411765,
|
|
"calib/gap": 0.04599999999999993,
|
|
"calib/mean_conf": 0.8435294117647059,
|
|
"calib/mu_c": 0.876,
|
|
"calib/mu_w": 0.8300000000000001,
|
|
"calib/nonempty_final_conf_rate": 0.06640625,
|
|
"calib/nonempty_reasoning_rate": 0.12109375,
|
|
"calib/nonempty_step_conf_rate": 0.07421875,
|
|
"calib/pce": 0.5494117647058824,
|
|
"calib/std_conf": 0.24369559547229233,
|
|
"calib/step_conf_rate": 0.07421875,
|
|
"calib/step_q_c": 0.6555000000000001,
|
|
"calib/step_q_c_n": 20.0,
|
|
"calib/step_q_gap": -0.0959102564102563,
|
|
"calib/step_q_w": 0.7514102564102564,
|
|
"calib/step_q_w_n": 78.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.078125,
|
|
"completions/max_length": 3033.0,
|
|
"completions/max_terminated_length": 3033.0,
|
|
"completions/mean_length": 654.44921875,
|
|
"completions/mean_terminated_length": 709.9110107421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.008533333333333334,
|
|
"grad_norm": 0.029148366302251816,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.1835,
|
|
"num_tokens": 2221660.0,
|
|
"reward": 0.04404374584555626,
|
|
"reward_std": 0.09272695332765579,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.025587501004338264,
|
|
"rewards/format_reward_step": 0.04296875,
|
|
"step": 8
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.3627358720852778,
|
|
"aux_distill/mean_u": 0.32543559097750124,
|
|
"aux_distill/n_active_tok": 16.615384615384617,
|
|
"calib/answer_extract_rate": 0.078125,
|
|
"calib/auroc": 0.47,
|
|
"calib/avg_num_step_conf": 0.2109375,
|
|
"calib/ece": 0.5559999999999999,
|
|
"calib/final_conf_rate": 0.05859375,
|
|
"calib/format_rate": 0.03125,
|
|
"calib/frac_conf_gt_0.9": 0.7333333333333333,
|
|
"calib/gap": 0.030999999999999917,
|
|
"calib/mean_conf": 0.8693333333333333,
|
|
"calib/mu_c": 0.8899999999999999,
|
|
"calib/mu_w": 0.859,
|
|
"calib/nonempty_final_conf_rate": 0.05859375,
|
|
"calib/nonempty_reasoning_rate": 0.09765625,
|
|
"calib/nonempty_step_conf_rate": 0.05078125,
|
|
"calib/pce": 0.5459999999999999,
|
|
"calib/std_conf": 0.2073794161005914,
|
|
"calib/step_conf_rate": 0.05078125,
|
|
"calib/step_q_c": 0.782,
|
|
"calib/step_q_c_n": 15.0,
|
|
"calib/step_q_gap": -0.029282051282051302,
|
|
"calib/step_q_w": 0.8112820512820513,
|
|
"calib/step_q_w_n": 39.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.078125,
|
|
"completions/max_length": 2996.0,
|
|
"completions/max_terminated_length": 2996.0,
|
|
"completions/mean_length": 617.34765625,
|
|
"completions/mean_terminated_length": 669.665283203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.038193874061107635,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.1983,
|
|
"num_tokens": 2487237.0,
|
|
"reward": 0.03526093810796738,
|
|
"reward_std": 0.06660518795251846,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.019740624353289604,
|
|
"rewards/format_reward_step": 0.03125,
|
|
"step": 9
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.188918528648523,
|
|
"aux_distill/mean_u": 0.2326953426965051,
|
|
"aux_distill/n_active_tok": 17.23076923076923,
|
|
"calib/answer_extract_rate": 0.08984375,
|
|
"calib/auroc": 0.7083333333333334,
|
|
"calib/avg_num_step_conf": 0.21875,
|
|
"calib/ece": 0.5243571428571429,
|
|
"calib/final_conf_rate": 0.0546875,
|
|
"calib/format_rate": 0.04296875,
|
|
"calib/frac_conf_gt_0.9": 0.42857142857142855,
|
|
"calib/gap": 0.28241666666666665,
|
|
"calib/mean_conf": 0.6329285714285715,
|
|
"calib/mu_c": 0.875,
|
|
"calib/mu_w": 0.5925833333333334,
|
|
"calib/nonempty_final_conf_rate": 0.0546875,
|
|
"calib/nonempty_reasoning_rate": 0.1015625,
|
|
"calib/nonempty_step_conf_rate": 0.0546875,
|
|
"calib/pce": 0.5072142857142857,
|
|
"calib/std_conf": 0.371696970333499,
|
|
"calib/step_conf_rate": 0.0546875,
|
|
"calib/step_q_c": 0.8833333333333333,
|
|
"calib/step_q_c_n": 6.0,
|
|
"calib/step_q_gap": 0.35929333333333335,
|
|
"calib/step_q_w": 0.52404,
|
|
"calib/step_q_w_n": 50.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 2989.0,
|
|
"completions/max_terminated_length": 2989.0,
|
|
"completions/mean_length": 588.48828125,
|
|
"completions/mean_terminated_length": 649.3663940429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.010666666666666666,
|
|
"grad_norm": 0.0316932275891304,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.1596,
|
|
"num_tokens": 2744690.0,
|
|
"reward": 0.03827636316418648,
|
|
"reward_std": 0.09364674240350723,
|
|
"rewards/accuracy_reward_step": 0.0078125,
|
|
"rewards/final_brier_reward_step": 0.025771480053663254,
|
|
"rewards/format_reward_step": 0.04296875,
|
|
"step": 10
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1973961561918258,
|
|
"aux_distill/mean_u": 0.2825744844156909,
|
|
"aux_distill/n_active_tok": 25.2,
|
|
"calib/answer_extract_rate": 0.1171875,
|
|
"calib/auroc": 0.41666666666666663,
|
|
"calib/avg_num_step_conf": 0.5,
|
|
"calib/ece": 0.6805263157894734,
|
|
"calib/final_conf_rate": 0.07421875,
|
|
"calib/format_rate": 0.07421875,
|
|
"calib/frac_conf_gt_0.9": 0.8421052631578947,
|
|
"calib/gap": -0.11243589743589744,
|
|
"calib/mean_conf": 0.9152631578947369,
|
|
"calib/mu_c": 0.8383333333333334,
|
|
"calib/mu_w": 0.9507692307692308,
|
|
"calib/nonempty_final_conf_rate": 0.07421875,
|
|
"calib/nonempty_reasoning_rate": 0.15234375,
|
|
"calib/nonempty_step_conf_rate": 0.12109375,
|
|
"calib/pce": 0.6399999999999998,
|
|
"calib/std_conf": 0.1659222525938494,
|
|
"calib/step_conf_rate": 0.12109375,
|
|
"calib/step_q_c": 0.7260714285714286,
|
|
"calib/step_q_c_n": 28.0,
|
|
"calib/step_q_gap": -0.05922857142857141,
|
|
"calib/step_q_w": 0.7853,
|
|
"calib/step_q_w_n": 100.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.10546875,
|
|
"completions/max_length": 3055.0,
|
|
"completions/max_terminated_length": 3055.0,
|
|
"completions/mean_length": 662.40625,
|
|
"completions/mean_terminated_length": 740.506591796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011733333333333333,
|
|
"grad_norm": 0.04365599527955055,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.2666,
|
|
"num_tokens": 3018746.0,
|
|
"reward": 0.06175879016518593,
|
|
"reward_std": 0.12854436039924622,
|
|
"rewards/accuracy_reward_step": 0.0234375,
|
|
"rewards/final_brier_reward_step": 0.025861326605081558,
|
|
"rewards/format_reward_step": 0.07421875,
|
|
"step": 11
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.3279105214511646,
|
|
"aux_distill/mean_u": 0.4500246926560413,
|
|
"aux_distill/n_active_tok": 22.823529411764707,
|
|
"calib/answer_extract_rate": 0.09765625,
|
|
"calib/auroc": 0.4166666666666667,
|
|
"calib/avg_num_step_conf": 0.37890625,
|
|
"calib/ece": 0.6394117647058823,
|
|
"calib/final_conf_rate": 0.06640625,
|
|
"calib/format_rate": 0.05078125,
|
|
"calib/frac_conf_gt_0.9": 0.8823529411764706,
|
|
"calib/gap": 0.04166666666666674,
|
|
"calib/mean_conf": 0.9205882352941178,
|
|
"calib/mu_c": 0.95,
|
|
"calib/mu_w": 0.9083333333333332,
|
|
"calib/nonempty_final_conf_rate": 0.06640625,
|
|
"calib/nonempty_reasoning_rate": 0.12109375,
|
|
"calib/nonempty_step_conf_rate": 0.078125,
|
|
"calib/pce": 0.6329411764705882,
|
|
"calib/std_conf": 0.1596124198409198,
|
|
"calib/step_conf_rate": 0.078125,
|
|
"calib/step_q_c": 0.6076666666666667,
|
|
"calib/step_q_c_n": 30.0,
|
|
"calib/step_q_gap": -0.1189004975124377,
|
|
"calib/step_q_w": 0.7265671641791044,
|
|
"calib/step_q_w_n": 67.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.109375,
|
|
"completions/max_length": 3013.0,
|
|
"completions/max_terminated_length": 3013.0,
|
|
"completions/mean_length": 628.9296875,
|
|
"completions/mean_terminated_length": 706.1666870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.05113685876131058,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.2352,
|
|
"num_tokens": 3283928.0,
|
|
"reward": 0.0459529273211956,
|
|
"reward_std": 0.1216077208518982,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.021593358367681503,
|
|
"rewards/format_reward_step": 0.05078125,
|
|
"step": 12
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1530120442895329,
|
|
"aux_distill/mean_u": 0.33127441420315484,
|
|
"aux_distill/n_active_tok": 32.0,
|
|
"calib/answer_extract_rate": 0.08203125,
|
|
"calib/auroc": 0.5390625,
|
|
"calib/avg_num_step_conf": 0.546875,
|
|
"calib/ece": 0.5975,
|
|
"calib/final_conf_rate": 0.078125,
|
|
"calib/format_rate": 0.0625,
|
|
"calib/frac_conf_gt_0.9": 0.55,
|
|
"calib/gap": 0.07812500000000011,
|
|
"calib/mean_conf": 0.7474999999999999,
|
|
"calib/mu_c": 0.81,
|
|
"calib/mu_w": 0.7318749999999999,
|
|
"calib/nonempty_final_conf_rate": 0.078125,
|
|
"calib/nonempty_reasoning_rate": 0.1171875,
|
|
"calib/nonempty_step_conf_rate": 0.109375,
|
|
"calib/pce": 0.5725,
|
|
"calib/std_conf": 0.3290117779046823,
|
|
"calib/step_conf_rate": 0.109375,
|
|
"calib/step_q_c": 0.7300000000000001,
|
|
"calib/step_q_c_n": 15.0,
|
|
"calib/step_q_gap": 0.13221559139784955,
|
|
"calib/step_q_w": 0.5977844086021505,
|
|
"calib/step_q_w_n": 124.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 2938.0,
|
|
"completions/max_terminated_length": 2938.0,
|
|
"completions/mean_length": 637.359375,
|
|
"completions/mean_terminated_length": 697.2821044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.013866666666666666,
|
|
"grad_norm": 0.04118745028972626,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.2057,
|
|
"num_tokens": 3551684.0,
|
|
"reward": 0.05240878835320473,
|
|
"reward_std": 0.11728723347187042,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.026692576706409454,
|
|
"rewards/format_reward_step": 0.0625,
|
|
"step": 13
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1231984041986012,
|
|
"aux_distill/mean_u": 0.275275627062822,
|
|
"aux_distill/n_active_tok": 25.904761904761905,
|
|
"calib/answer_extract_rate": 0.109375,
|
|
"calib/auroc": 0.3583333333333333,
|
|
"calib/avg_num_step_conf": 0.53125,
|
|
"calib/ece": 0.7061434782608695,
|
|
"calib/final_conf_rate": 0.08984375,
|
|
"calib/format_rate": 0.06640625,
|
|
"calib/frac_conf_gt_0.9": 0.4782608695652174,
|
|
"calib/gap": -0.11906499999999987,
|
|
"calib/mean_conf": 0.7635347826086957,
|
|
"calib/mu_c": 0.66,
|
|
"calib/mu_w": 0.7790649999999999,
|
|
"calib/nonempty_final_conf_rate": 0.08984375,
|
|
"calib/nonempty_reasoning_rate": 0.16015625,
|
|
"calib/nonempty_step_conf_rate": 0.12109375,
|
|
"calib/pce": 0.6696217391304349,
|
|
"calib/std_conf": 0.30982760368882795,
|
|
"calib/step_conf_rate": 0.12109375,
|
|
"calib/step_q_c": 0.44999999999999996,
|
|
"calib/step_q_c_n": 13.0,
|
|
"calib/step_q_gap": -0.16527398373983748,
|
|
"calib/step_q_w": 0.6152739837398374,
|
|
"calib/step_q_w_n": 123.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 3038.0,
|
|
"completions/max_terminated_length": 3038.0,
|
|
"completions/mean_length": 721.92578125,
|
|
"completions/mean_terminated_length": 789.7991943359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.014933333333333333,
|
|
"grad_norm": 0.0329865962266922,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.2534,
|
|
"num_tokens": 3841897.0,
|
|
"reward": 0.052902527153491974,
|
|
"reward_std": 0.11711085587739944,
|
|
"rewards/accuracy_reward_step": 0.01171875,
|
|
"rewards/final_brier_reward_step": 0.027680054306983948,
|
|
"rewards/format_reward_step": 0.06640625,
|
|
"step": 14
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.1267299324274063,
|
|
"aux_distill/mean_u": 0.4025237962874277,
|
|
"aux_distill/n_active_tok": 27.0,
|
|
"calib/answer_extract_rate": 0.11328125,
|
|
"calib/auroc": 0.19318181818181815,
|
|
"calib/avg_num_step_conf": 0.52734375,
|
|
"calib/ece": 0.7266666666666668,
|
|
"calib/final_conf_rate": 0.09375,
|
|
"calib/format_rate": 0.0859375,
|
|
"calib/frac_conf_gt_0.9": 0.5416666666666666,
|
|
"calib/gap": -0.10636363636363633,
|
|
"calib/mean_conf": 0.7825000000000001,
|
|
"calib/mu_c": 0.685,
|
|
"calib/mu_w": 0.7913636363636364,
|
|
"calib/nonempty_final_conf_rate": 0.09375,
|
|
"calib/nonempty_reasoning_rate": 0.1328125,
|
|
"calib/nonempty_step_conf_rate": 0.11328125,
|
|
"calib/pce": 0.7129166666666668,
|
|
"calib/std_conf": 0.3053993287484437,
|
|
"calib/step_conf_rate": 0.11328125,
|
|
"calib/step_q_c": 0.4879999999999999,
|
|
"calib/step_q_c_n": 10.0,
|
|
"calib/step_q_gap": -0.186724,
|
|
"calib/step_q_w": 0.6747239999999999,
|
|
"calib/step_q_w_n": 125.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.07421875,
|
|
"completions/max_length": 3016.0,
|
|
"completions/max_terminated_length": 3016.0,
|
|
"completions/mean_length": 644.44921875,
|
|
"completions/mean_terminated_length": 696.1138916015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.026582898572087288,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.2583,
|
|
"num_tokens": 4114756.0,
|
|
"reward": 0.061240628361701965,
|
|
"reward_std": 0.12151821702718735,
|
|
"rewards/accuracy_reward_step": 0.0078125,
|
|
"rewards/final_brier_reward_step": 0.028731251135468483,
|
|
"rewards/format_reward_step": 0.0859375,
|
|
"step": 15
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.135950155556202,
|
|
"aux_distill/mean_u": 0.3597005385226347,
|
|
"aux_distill/n_active_tok": 25.5,
|
|
"calib/answer_extract_rate": 0.078125,
|
|
"calib/auroc": 0.3818181818181818,
|
|
"calib/avg_num_step_conf": 0.3984375,
|
|
"calib/ece": 0.6481875,
|
|
"calib/final_conf_rate": 0.0625,
|
|
"calib/format_rate": 0.05078125,
|
|
"calib/frac_conf_gt_0.9": 0.375,
|
|
"calib/gap": -0.21601818181818166,
|
|
"calib/mean_conf": 0.6943125,
|
|
"calib/mu_c": 0.5458000000000001,
|
|
"calib/mu_w": 0.7618181818181817,
|
|
"calib/nonempty_final_conf_rate": 0.0625,
|
|
"calib/nonempty_reasoning_rate": 0.109375,
|
|
"calib/nonempty_step_conf_rate": 0.08203125,
|
|
"calib/pce": 0.515,
|
|
"calib/std_conf": 0.29892133554457095,
|
|
"calib/step_conf_rate": 0.08203125,
|
|
"calib/step_q_c": 0.5407692307692308,
|
|
"calib/step_q_c_n": 26.0,
|
|
"calib/step_q_gap": -0.19265182186234808,
|
|
"calib/step_q_w": 0.7334210526315789,
|
|
"calib/step_q_w_n": 76.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08203125,
|
|
"completions/max_length": 3048.0,
|
|
"completions/max_terminated_length": 3048.0,
|
|
"completions/mean_length": 733.09765625,
|
|
"completions/mean_terminated_length": 798.6084594726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.017066666666666667,
|
|
"grad_norm": 0.02180486172437668,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.209,
|
|
"num_tokens": 4411277.0,
|
|
"reward": 0.0470406636595726,
|
|
"reward_std": 0.11150971055030823,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.023768823593854904,
|
|
"rewards/format_reward_step": 0.05078125,
|
|
"step": 16
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.2999999999999999,
|
|
"aux_distill/loss": 1.048417510986328,
|
|
"aux_distill/mean_u": 0.352884973460103,
|
|
"aux_distill/n_active_tok": 34.08,
|
|
"calib/answer_extract_rate": 0.1640625,
|
|
"calib/auroc": 0.3012422360248447,
|
|
"calib/avg_num_step_conf": 0.84765625,
|
|
"calib/ece": 0.641,
|
|
"calib/final_conf_rate": 0.1171875,
|
|
"calib/format_rate": 0.0859375,
|
|
"calib/frac_conf_gt_0.9": 0.5333333333333333,
|
|
"calib/gap": -0.22850931677018627,
|
|
"calib/mean_conf": 0.7123333333333333,
|
|
"calib/mu_c": 0.5371428571428571,
|
|
"calib/mu_w": 0.7656521739130434,
|
|
"calib/nonempty_final_conf_rate": 0.1171875,
|
|
"calib/nonempty_reasoning_rate": 0.234375,
|
|
"calib/nonempty_step_conf_rate": 0.1640625,
|
|
"calib/pce": 0.5599999999999999,
|
|
"calib/std_conf": 0.31450154565951216,
|
|
"calib/step_conf_rate": 0.1640625,
|
|
"calib/step_q_c": 0.5005045454545455,
|
|
"calib/step_q_c_n": 22.0,
|
|
"calib/step_q_gap": -0.09365750582750587,
|
|
"calib/step_q_w": 0.5941620512820514,
|
|
"calib/step_q_w_n": 195.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09375,
|
|
"completions/max_length": 2991.0,
|
|
"completions/max_terminated_length": 2991.0,
|
|
"completions/mean_length": 616.61328125,
|
|
"completions/mean_terminated_length": 680.40087890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.018133333333333335,
|
|
"grad_norm": 0.022275879979133606,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.2932,
|
|
"num_tokens": 4672658.0,
|
|
"reward": 0.07711464911699295,
|
|
"reward_std": 0.1641203761100769,
|
|
"rewards/accuracy_reward_step": 0.02734375,
|
|
"rewards/final_brier_reward_step": 0.0409480482339859,
|
|
"rewards/format_reward_step": 0.0859375,
|
|
"step": 17
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.0378237331614775,
|
|
"aux_distill/mean_u": 0.2850642462215336,
|
|
"aux_distill/n_active_tok": 17.176470588235293,
|
|
"calib/answer_extract_rate": 0.05859375,
|
|
"calib/auroc": 0.18181818181818177,
|
|
"calib/avg_num_step_conf": 0.28515625,
|
|
"calib/ece": 0.6483374879275133,
|
|
"calib/final_conf_rate": 0.046875,
|
|
"calib/format_rate": 0.0390625,
|
|
"calib/frac_conf_gt_0.9": 0.3333333333333333,
|
|
"calib/gap": -0.37091362319365084,
|
|
"calib/mean_conf": 0.61000415459418,
|
|
"calib/mu_c": 0.27,
|
|
"calib/mu_w": 0.6409136231936509,
|
|
"calib/nonempty_final_conf_rate": 0.046875,
|
|
"calib/nonempty_reasoning_rate": 0.09375,
|
|
"calib/nonempty_step_conf_rate": 0.07421875,
|
|
"calib/pce": 0.58750415459418,
|
|
"calib/std_conf": 0.3330381710490205,
|
|
"calib/step_conf_rate": 0.07421875,
|
|
"calib/step_q_c": 0.446,
|
|
"calib/step_q_c_n": 5.0,
|
|
"calib/step_q_gap": -0.10885294117647043,
|
|
"calib/step_q_w": 0.5548529411764704,
|
|
"calib/step_q_w_n": 68.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1328125,
|
|
"completions/max_length": 3013.0,
|
|
"completions/max_terminated_length": 3013.0,
|
|
"completions/mean_length": 631.98828125,
|
|
"completions/mean_terminated_length": 728.779296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.03261338174343109,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.1528,
|
|
"num_tokens": 4945167.0,
|
|
"reward": 0.030748046934604645,
|
|
"reward_std": 0.07678812742233276,
|
|
"rewards/accuracy_reward_step": 0.00390625,
|
|
"rewards/final_brier_reward_step": 0.01852734386920929,
|
|
"rewards/format_reward_step": 0.0390625,
|
|
"step": 18
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.0097080160070349,
|
|
"aux_distill/mean_u": 0.3194098281602694,
|
|
"aux_distill/n_active_tok": 41.333333333333336,
|
|
"calib/answer_extract_rate": 0.16015625,
|
|
"calib/auroc": 0.5208333333333333,
|
|
"calib/avg_num_step_conf": 1.08984375,
|
|
"calib/ece": 0.47091428571428573,
|
|
"calib/final_conf_rate": 0.13671875,
|
|
"calib/format_rate": 0.09765625,
|
|
"calib/frac_conf_gt_0.9": 0.22857142857142856,
|
|
"calib/gap": 0.07566666666666666,
|
|
"calib/mean_conf": 0.5274857142857142,
|
|
"calib/mu_c": 0.5966666666666667,
|
|
"calib/mu_w": 0.521,
|
|
"calib/nonempty_final_conf_rate": 0.13671875,
|
|
"calib/nonempty_reasoning_rate": 0.234375,
|
|
"calib/nonempty_step_conf_rate": 0.19921875,
|
|
"calib/pce": 0.45634285714285716,
|
|
"calib/std_conf": 0.3273590751478148,
|
|
"calib/step_conf_rate": 0.19921875,
|
|
"calib/step_q_c": 0.7294736842105264,
|
|
"calib/step_q_c_n": 19.0,
|
|
"calib/step_q_gap": 0.3565667611336033,
|
|
"calib/step_q_w": 0.3729069230769231,
|
|
"calib/step_q_w_n": 260.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05078125,
|
|
"completions/max_length": 2837.0,
|
|
"completions/max_terminated_length": 2837.0,
|
|
"completions/mean_length": 608.08203125,
|
|
"completions/mean_terminated_length": 640.6131591796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.020266666666666665,
|
|
"grad_norm": 0.02417433261871338,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.3277,
|
|
"num_tokens": 5205596.0,
|
|
"reward": 0.08809477090835571,
|
|
"reward_std": 0.19941377639770508,
|
|
"rewards/accuracy_reward_step": 0.015625,
|
|
"rewards/final_brier_reward_step": 0.06290827691555023,
|
|
"rewards/format_reward_step": 0.09765625,
|
|
"step": 19
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.29999999999999993,
|
|
"aux_distill/loss": 1.0175798769508089,
|
|
"aux_distill/mean_u": 0.345296856355202,
|
|
"aux_distill/n_active_tok": 46.857142857142854,
|
|
"calib/answer_extract_rate": 0.2109375,
|
|
"calib/auroc": 0.3888888888888889,
|
|
"calib/avg_num_step_conf": 1.28515625,
|
|
"calib/ece": 0.38424444444444444,
|
|
"calib/final_conf_rate": 0.17578125,
|
|
"calib/format_rate": 0.1328125,
|
|
"calib/frac_conf_gt_0.9": 0.1111111111111111,
|
|
"calib/gap": -0.08728205128205124,
|
|
"calib/mean_conf": 0.447311111111111,
|
|
"calib/mu_c": 0.37166666666666665,
|
|
"calib/mu_w": 0.4589487179487179,
|
|
"calib/nonempty_final_conf_rate": 0.17578125,
|
|
"calib/nonempty_reasoning_rate": 0.328125,
|
|
"calib/nonempty_step_conf_rate": 0.26953125,
|
|
"calib/pce": 0.3491111111111111,
|
|
"calib/std_conf": 0.3052094233452916,
|
|
"calib/step_conf_rate": 0.26953125,
|
|
"calib/step_q_c": 0.47380952380952385,
|
|
"calib/step_q_c_n": 21.0,
|
|
"calib/step_q_gap": 0.19352738095238098,
|
|
"calib/step_q_w": 0.28028214285714287,
|
|
"calib/step_q_w_n": 308.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 2955.0,
|
|
"completions/max_terminated_length": 2955.0,
|
|
"completions/mean_length": 582.6796875,
|
|
"completions/mean_terminated_length": 621.5250244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.021333333333333333,
|
|
"grad_norm": 0.021719202399253845,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.3435,
|
|
"num_tokens": 5459634.0,
|
|
"reward": 0.12606582045555115,
|
|
"reward_std": 0.24382349848747253,
|
|
"rewards/accuracy_reward_step": 0.0234375,
|
|
"rewards/final_brier_reward_step": 0.0958816409111023,
|
|
"rewards/format_reward_step": 0.1328125,
|
|
"step": 20
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9779601020197715,
|
|
"aux_distill/mean_u": 0.35301126158003554,
|
|
"aux_distill/n_active_tok": 59.74193548387097,
|
|
"calib/answer_extract_rate": 0.2421875,
|
|
"calib/auroc": 0.4804347826086957,
|
|
"calib/avg_num_step_conf": 1.93359375,
|
|
"calib/ece": 0.39197978315128956,
|
|
"calib/final_conf_rate": 0.19921875,
|
|
"calib/format_rate": 0.16796875,
|
|
"calib/frac_conf_gt_0.9": 0.1568627450980392,
|
|
"calib/gap": 0.011890629145995035,
|
|
"calib/mean_conf": 0.39727511880949473,
|
|
"calib/mu_c": 0.40800000000000003,
|
|
"calib/mu_w": 0.396109370854005,
|
|
"calib/nonempty_final_conf_rate": 0.19921875,
|
|
"calib/nonempty_reasoning_rate": 0.38671875,
|
|
"calib/nonempty_step_conf_rate": 0.328125,
|
|
"calib/pce": 0.3456078431372549,
|
|
"calib/std_conf": 0.3456198548759202,
|
|
"calib/step_conf_rate": 0.328125,
|
|
"calib/step_q_c": 0.23555555555555557,
|
|
"calib/step_q_c_n": 18.0,
|
|
"calib/step_q_gap": -0.0360178732428964,
|
|
"calib/step_q_w": 0.271573428798452,
|
|
"calib/step_q_w_n": 477.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.09765625,
|
|
"completions/max_length": 3006.0,
|
|
"completions/max_terminated_length": 3006.0,
|
|
"completions/mean_length": 621.890625,
|
|
"completions/mean_terminated_length": 689.19482421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.029949838295578957,
|
|
"learning_rate": 4.9722222222222224e-06,
|
|
"loss": 0.4186,
|
|
"num_tokens": 5721798.0,
|
|
"reward": 0.15618005394935608,
|
|
"reward_std": 0.32561713457107544,
|
|
"rewards/accuracy_reward_step": 0.01953125,
|
|
"rewards/final_brier_reward_step": 0.12486011534929276,
|
|
"rewards/format_reward_step": 0.16796875,
|
|
"step": 21
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 1.0142595786601305,
|
|
"aux_distill/mean_u": 0.38725862041017606,
|
|
"aux_distill/n_active_tok": 70.125,
|
|
"calib/answer_extract_rate": 0.31640625,
|
|
"calib/auroc": 0.6407407407407408,
|
|
"calib/avg_num_step_conf": 2.203125,
|
|
"calib/ece": 0.27249275362318837,
|
|
"calib/final_conf_rate": 0.26953125,
|
|
"calib/format_rate": 0.203125,
|
|
"calib/frac_conf_gt_0.9": 0.15942028985507245,
|
|
"calib/gap": 0.18299999999999994,
|
|
"calib/mean_conf": 0.4194492753623189,
|
|
"calib/mu_c": 0.5626666666666666,
|
|
"calib/mu_w": 0.3796666666666667,
|
|
"calib/nonempty_final_conf_rate": 0.26953125,
|
|
"calib/nonempty_reasoning_rate": 0.49609375,
|
|
"calib/nonempty_step_conf_rate": 0.41796875,
|
|
"calib/pce": 0.23727536231884055,
|
|
"calib/std_conf": 0.34296911325132823,
|
|
"calib/step_conf_rate": 0.41796875,
|
|
"calib/step_q_c": 0.21596969696969698,
|
|
"calib/step_q_c_n": 66.0,
|
|
"calib/step_q_gap": -0.03991813435560426,
|
|
"calib/step_q_w": 0.25588783132530124,
|
|
"calib/step_q_w_n": 498.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0546875,
|
|
"completions/max_length": 2882.0,
|
|
"completions/max_terminated_length": 2882.0,
|
|
"completions/mean_length": 561.83984375,
|
|
"completions/mean_terminated_length": 594.3429565429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.023466666666666667,
|
|
"grad_norm": 0.02805226296186447,
|
|
"learning_rate": 4.944444444444445e-06,
|
|
"loss": 0.3614,
|
|
"num_tokens": 5967445.0,
|
|
"reward": 0.20527201890945435,
|
|
"reward_std": 0.34744924306869507,
|
|
"rewards/accuracy_reward_step": 0.05859375,
|
|
"rewards/final_brier_reward_step": 0.14882531762123108,
|
|
"rewards/format_reward_step": 0.203125,
|
|
"step": 22
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9831480960692128,
|
|
"aux_distill/mean_u": 0.31227868519153984,
|
|
"aux_distill/n_active_tok": 66.58064516129032,
|
|
"calib/answer_extract_rate": 0.33984375,
|
|
"calib/auroc": 0.43243243243243246,
|
|
"calib/avg_num_step_conf": 2.0234375,
|
|
"calib/ece": 0.2986448154098389,
|
|
"calib/final_conf_rate": 0.296875,
|
|
"calib/format_rate": 0.23828125,
|
|
"calib/frac_conf_gt_0.9": 0.06578947368421052,
|
|
"calib/gap": -0.13671629690740222,
|
|
"calib/mean_conf": 0.3181184996203653,
|
|
"calib/mu_c": 0.185,
|
|
"calib/mu_w": 0.3217162969074022,
|
|
"calib/nonempty_final_conf_rate": 0.296875,
|
|
"calib/nonempty_reasoning_rate": 0.48828125,
|
|
"calib/nonempty_step_conf_rate": 0.41015625,
|
|
"calib/pce": 0.29522376277826,
|
|
"calib/std_conf": 0.2936267548297175,
|
|
"calib/step_conf_rate": 0.41015625,
|
|
"calib/step_q_c": 0.35166666666666674,
|
|
"calib/step_q_c_n": 6.0,
|
|
"calib/step_q_gap": 0.07229459464389526,
|
|
"calib/step_q_w": 0.2793720720227715,
|
|
"calib/step_q_w_n": 512.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05859375,
|
|
"completions/max_length": 2969.0,
|
|
"completions/max_terminated_length": 2969.0,
|
|
"completions/mean_length": 624.36328125,
|
|
"completions/mean_terminated_length": 663.22412109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.024533333333333334,
|
|
"grad_norm": 0.01959611475467682,
|
|
"learning_rate": 4.9166666666666665e-06,
|
|
"loss": 0.3293,
|
|
"num_tokens": 6231218.0,
|
|
"reward": 0.2157849371433258,
|
|
"reward_std": 0.3200821280479431,
|
|
"rewards/accuracy_reward_step": 0.0078125,
|
|
"rewards/final_brier_reward_step": 0.18547609448432922,
|
|
"rewards/format_reward_step": 0.23828125,
|
|
"step": 23
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 1.0078411493450403,
|
|
"aux_distill/mean_u": 0.37552853406236947,
|
|
"aux_distill/n_active_tok": 83.25,
|
|
"calib/answer_extract_rate": 0.41015625,
|
|
"calib/auroc": 0.3296511627906976,
|
|
"calib/avg_num_step_conf": 2.6015625,
|
|
"calib/ece": 0.3622120249895833,
|
|
"calib/final_conf_rate": 0.375,
|
|
"calib/format_rate": 0.30859375,
|
|
"calib/frac_conf_gt_0.9": 0.10416666666666667,
|
|
"calib/gap": -0.12713760929069765,
|
|
"calib/mean_conf": 0.36041410832291665,
|
|
"calib/mu_c": 0.24652,
|
|
"calib/mu_w": 0.37365760929069763,
|
|
"calib/nonempty_final_conf_rate": 0.375,
|
|
"calib/nonempty_reasoning_rate": 0.59375,
|
|
"calib/nonempty_step_conf_rate": 0.51953125,
|
|
"calib/pce": 0.30922973332291664,
|
|
"calib/std_conf": 0.31589091395569247,
|
|
"calib/step_conf_rate": 0.51953125,
|
|
"calib/step_q_c": 0.22427352941176468,
|
|
"calib/step_q_c_n": 34.0,
|
|
"calib/step_q_gap": -0.11105084149312358,
|
|
"calib/step_q_w": 0.33532437090488826,
|
|
"calib/step_q_w_n": 632.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 3060.0,
|
|
"completions/max_terminated_length": 3060.0,
|
|
"completions/mean_length": 565.09765625,
|
|
"completions/mean_terminated_length": 588.069091796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.02145981602370739,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": 0.4741,
|
|
"num_tokens": 6480395.0,
|
|
"reward": 0.2843121290206909,
|
|
"reward_std": 0.3424231708049774,
|
|
"rewards/accuracy_reward_step": 0.04296875,
|
|
"rewards/final_brier_reward_step": 0.21706172823905945,
|
|
"rewards/format_reward_step": 0.30859375,
|
|
"step": 24
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9836863875389099,
|
|
"aux_distill/mean_u": 0.3532546896091583,
|
|
"aux_distill/n_active_tok": 106.0,
|
|
"calib/answer_extract_rate": 0.4453125,
|
|
"calib/auroc": 0.4957142857142857,
|
|
"calib/avg_num_step_conf": 3.3125,
|
|
"calib/ece": 0.3087335762024953,
|
|
"calib/final_conf_rate": 0.41796875,
|
|
"calib/format_rate": 0.328125,
|
|
"calib/frac_conf_gt_0.9": 0.11214953271028037,
|
|
"calib/gap": -0.003102926536669992,
|
|
"calib/mean_conf": 0.3528999313426823,
|
|
"calib/mu_c": 0.35,
|
|
"calib/mu_w": 0.35310292653666997,
|
|
"calib/nonempty_final_conf_rate": 0.41796875,
|
|
"calib/nonempty_reasoning_rate": 0.6796875,
|
|
"calib/nonempty_step_conf_rate": 0.59765625,
|
|
"calib/pce": 0.298106473398757,
|
|
"calib/std_conf": 0.3057849058569881,
|
|
"calib/step_conf_rate": 0.59765625,
|
|
"calib/step_q_c": 0.13434782608695653,
|
|
"calib/step_q_c_n": 23.0,
|
|
"calib/step_q_gap": -0.15756816371922702,
|
|
"calib/step_q_w": 0.29191598980618355,
|
|
"calib/step_q_w_n": 825.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 3061.0,
|
|
"completions/max_terminated_length": 3061.0,
|
|
"completions/mean_length": 598.171875,
|
|
"completions/mean_terminated_length": 622.48779296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.02666666666666667,
|
|
"grad_norm": 0.02117428369820118,
|
|
"learning_rate": 4.861111111111111e-06,
|
|
"loss": 0.4794,
|
|
"num_tokens": 6736751.0,
|
|
"reward": 0.302656352519989,
|
|
"reward_std": 0.3933459520339966,
|
|
"rewards/accuracy_reward_step": 0.02734375,
|
|
"rewards/final_brier_reward_step": 0.2498440146446228,
|
|
"rewards/format_reward_step": 0.328125,
|
|
"step": 25
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9649364463984966,
|
|
"aux_distill/mean_u": 0.43439169292320684,
|
|
"aux_distill/n_active_tok": 122.25,
|
|
"calib/answer_extract_rate": 0.5,
|
|
"calib/auroc": 0.5594135802469136,
|
|
"calib/avg_num_step_conf": 3.83984375,
|
|
"calib/ece": 0.32071225440319434,
|
|
"calib/final_conf_rate": 0.4453125,
|
|
"calib/format_rate": 0.37109375,
|
|
"calib/frac_conf_gt_0.9": 0.07017543859649122,
|
|
"calib/gap": 0.01434076850033189,
|
|
"calib/mean_conf": 0.36474734212249255,
|
|
"calib/mu_c": 0.37833333333333335,
|
|
"calib/mu_w": 0.36399256483300146,
|
|
"calib/nonempty_final_conf_rate": 0.4453125,
|
|
"calib/nonempty_reasoning_rate": 0.734375,
|
|
"calib/nonempty_step_conf_rate": 0.66015625,
|
|
"calib/pce": 0.3164140087891592,
|
|
"calib/std_conf": 0.28864045101743246,
|
|
"calib/step_conf_rate": 0.66015625,
|
|
"calib/step_q_c": 0.18026315789473682,
|
|
"calib/step_q_c_n": 38.0,
|
|
"calib/step_q_gap": -0.07871680475979861,
|
|
"calib/step_q_w": 0.25897996265453543,
|
|
"calib/step_q_w_n": 945.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 2970.0,
|
|
"completions/max_terminated_length": 2970.0,
|
|
"completions/mean_length": 603.3359375,
|
|
"completions/mean_terminated_length": 627.8617553710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.027733333333333332,
|
|
"grad_norm": 0.021297749131917953,
|
|
"learning_rate": 4.833333333333333e-06,
|
|
"loss": 0.4328,
|
|
"num_tokens": 6996445.0,
|
|
"reward": 0.3466510474681854,
|
|
"reward_std": 0.435642272233963,
|
|
"rewards/accuracy_reward_step": 0.0234375,
|
|
"rewards/final_brier_reward_step": 0.29877087473869324,
|
|
"rewards/format_reward_step": 0.37109375,
|
|
"step": 26
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9238360952585936,
|
|
"aux_distill/mean_u": 0.3704680656924057,
|
|
"aux_distill/n_active_tok": 123.75,
|
|
"calib/answer_extract_rate": 0.6015625,
|
|
"calib/auroc": 0.43408613445378147,
|
|
"calib/avg_num_step_conf": 3.953125,
|
|
"calib/ece": 0.30361644310026653,
|
|
"calib/final_conf_rate": 0.5859375,
|
|
"calib/format_rate": 0.48828125,
|
|
"calib/frac_conf_gt_0.9": 0.08,
|
|
"calib/gap": -0.0879839650679834,
|
|
"calib/mean_conf": 0.3743435568997334,
|
|
"calib/mu_c": 0.2945714285714286,
|
|
"calib/mu_w": 0.382555393639412,
|
|
"calib/nonempty_final_conf_rate": 0.5859375,
|
|
"calib/nonempty_reasoning_rate": 0.7890625,
|
|
"calib/nonempty_step_conf_rate": 0.703125,
|
|
"calib/pce": 0.2923133333333333,
|
|
"calib/std_conf": 0.29847912081859895,
|
|
"calib/step_conf_rate": 0.703125,
|
|
"calib/step_q_c": 0.32738,
|
|
"calib/step_q_c_n": 55.0,
|
|
"calib/step_q_gap": 0.03473288873430963,
|
|
"calib/step_q_w": 0.2926471112656904,
|
|
"calib/step_q_w_n": 956.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03125,
|
|
"completions/max_length": 2700.0,
|
|
"completions/max_terminated_length": 2700.0,
|
|
"completions/mean_length": 504.91015625,
|
|
"completions/mean_terminated_length": 521.1975708007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.019252900034189224,
|
|
"learning_rate": 4.805555555555556e-06,
|
|
"loss": 0.4474,
|
|
"num_tokens": 7230918.0,
|
|
"reward": 0.4484649896621704,
|
|
"reward_std": 0.42001235485076904,
|
|
"rewards/accuracy_reward_step": 0.0546875,
|
|
"rewards/final_brier_reward_step": 0.353961318731308,
|
|
"rewards/format_reward_step": 0.48828125,
|
|
"step": 27
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.912374921143055,
|
|
"aux_distill/mean_u": 0.35857990176340526,
|
|
"aux_distill/n_active_tok": 117.625,
|
|
"calib/answer_extract_rate": 0.5625,
|
|
"calib/auroc": 0.44670280036133697,
|
|
"calib/avg_num_step_conf": 3.8359375,
|
|
"calib/ece": 0.27327455407936924,
|
|
"calib/final_conf_rate": 0.55078125,
|
|
"calib/format_rate": 0.45703125,
|
|
"calib/frac_conf_gt_0.9": 0.05673758865248227,
|
|
"calib/gap": -0.038345627034073626,
|
|
"calib/mean_conf": 0.34845044060419184,
|
|
"calib/mu_c": 0.315,
|
|
"calib/mu_w": 0.35334562703407363,
|
|
"calib/nonempty_final_conf_rate": 0.55078125,
|
|
"calib/nonempty_reasoning_rate": 0.76953125,
|
|
"calib/nonempty_step_conf_rate": 0.69921875,
|
|
"calib/pce": 0.24703271010773803,
|
|
"calib/std_conf": 0.27598673100329424,
|
|
"calib/step_conf_rate": 0.69921875,
|
|
"calib/step_q_c": 0.4151190476190476,
|
|
"calib/step_q_c_n": 84.0,
|
|
"calib/step_q_gap": 0.0771668163846902,
|
|
"calib/step_q_w": 0.3379522312343574,
|
|
"calib/step_q_w_n": 898.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0546875,
|
|
"completions/max_length": 2680.0,
|
|
"completions/max_terminated_length": 2680.0,
|
|
"completions/mean_length": 481.4453125,
|
|
"completions/mean_terminated_length": 509.2974853515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.029866666666666666,
|
|
"grad_norm": 0.016628732904791832,
|
|
"learning_rate": 4.777777777777778e-06,
|
|
"loss": 0.3969,
|
|
"num_tokens": 7461112.0,
|
|
"reward": 0.4341887831687927,
|
|
"reward_std": 0.4316248893737793,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.34103381633758545,
|
|
"rewards/format_reward_step": 0.45703125,
|
|
"step": 28
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9332783650606871,
|
|
"aux_distill/mean_u": 0.387008633237988,
|
|
"aux_distill/n_active_tok": 118.875,
|
|
"calib/answer_extract_rate": 0.6796875,
|
|
"calib/auroc": 0.3953703703703704,
|
|
"calib/avg_num_step_conf": 3.74609375,
|
|
"calib/ece": 0.36014871610465116,
|
|
"calib/final_conf_rate": 0.671875,
|
|
"calib/format_rate": 0.55859375,
|
|
"calib/frac_conf_gt_0.9": 0.06395348837209303,
|
|
"calib/gap": -0.04073567388888888,
|
|
"calib/mean_conf": 0.38256732075581396,
|
|
"calib/mu_c": 0.3442,
|
|
"calib/mu_w": 0.3849356738888889,
|
|
"calib/nonempty_final_conf_rate": 0.671875,
|
|
"calib/nonempty_reasoning_rate": 0.87890625,
|
|
"calib/nonempty_step_conf_rate": 0.8046875,
|
|
"calib/pce": 0.3422882509883721,
|
|
"calib/std_conf": 0.2834787782532363,
|
|
"calib/step_conf_rate": 0.8046875,
|
|
"calib/step_q_c": 0.30735294117647055,
|
|
"calib/step_q_c_n": 34.0,
|
|
"calib/step_q_gap": -0.08506815316426153,
|
|
"calib/step_q_w": 0.3924210943407321,
|
|
"calib/step_q_w_n": 925.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 2737.0,
|
|
"completions/max_terminated_length": 2737.0,
|
|
"completions/mean_length": 443.3828125,
|
|
"completions/mean_terminated_length": 463.2897644042969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.030933333333333334,
|
|
"grad_norm": 0.0148693285882473,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.419,
|
|
"num_tokens": 7681746.0,
|
|
"reward": 0.5097656846046448,
|
|
"reward_std": 0.44440221786499023,
|
|
"rewards/accuracy_reward_step": 0.0390625,
|
|
"rewards/final_brier_reward_step": 0.4218751788139343,
|
|
"rewards/format_reward_step": 0.55859375,
|
|
"step": 29
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.935307526960969,
|
|
"aux_distill/mean_u": 0.33969780650988646,
|
|
"aux_distill/n_active_tok": 119.125,
|
|
"calib/answer_extract_rate": 0.75,
|
|
"calib/auroc": 0.42008196721311475,
|
|
"calib/avg_num_step_conf": 3.72265625,
|
|
"calib/ece": 0.3535955130890052,
|
|
"calib/final_conf_rate": 0.74609375,
|
|
"calib/format_rate": 0.61328125,
|
|
"calib/frac_conf_gt_0.9": 0.06282722513089005,
|
|
"calib/gap": -0.09215877049180332,
|
|
"calib/mean_conf": 0.39329871727748694,
|
|
"calib/mu_c": 0.305,
|
|
"calib/mu_w": 0.3971587704918033,
|
|
"calib/nonempty_final_conf_rate": 0.74609375,
|
|
"calib/nonempty_reasoning_rate": 0.88671875,
|
|
"calib/nonempty_step_conf_rate": 0.78125,
|
|
"calib/pce": 0.3525047068062827,
|
|
"calib/std_conf": 0.28442240836145366,
|
|
"calib/step_conf_rate": 0.78125,
|
|
"calib/step_q_c": 0.3773913043478261,
|
|
"calib/step_q_c_n": 23.0,
|
|
"calib/step_q_gap": 0.003847411028860548,
|
|
"calib/step_q_w": 0.37354389331896554,
|
|
"calib/step_q_w_n": 928.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2662.0,
|
|
"completions/max_terminated_length": 2662.0,
|
|
"completions/mean_length": 392.8515625,
|
|
"completions/mean_terminated_length": 397.5098876953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.017016848549246788,
|
|
"learning_rate": 4.722222222222222e-06,
|
|
"loss": 0.4111,
|
|
"num_tokens": 7889300.0,
|
|
"reward": 0.5534157752990723,
|
|
"reward_std": 0.43640461564064026,
|
|
"rewards/accuracy_reward_step": 0.03125,
|
|
"rewards/final_brier_reward_step": 0.4623003900051117,
|
|
"rewards/format_reward_step": 0.61328125,
|
|
"step": 30
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9320898372679949,
|
|
"aux_distill/mean_u": 0.35277812933664776,
|
|
"aux_distill/n_active_tok": 129.25,
|
|
"calib/answer_extract_rate": 0.75,
|
|
"calib/auroc": 0.5129411764705882,
|
|
"calib/avg_num_step_conf": 4.04296875,
|
|
"calib/ece": 0.34517497453549106,
|
|
"calib/final_conf_rate": 0.75,
|
|
"calib/format_rate": 0.65234375,
|
|
"calib/frac_conf_gt_0.9": 0.052083333333333336,
|
|
"calib/gap": 0.0063673556692964706,
|
|
"calib/mean_conf": 0.41537289120215776,
|
|
"calib/mu_c": 0.4211764705882353,
|
|
"calib/mu_w": 0.41480911491893885,
|
|
"calib/nonempty_final_conf_rate": 0.75,
|
|
"calib/nonempty_reasoning_rate": 0.9140625,
|
|
"calib/nonempty_step_conf_rate": 0.85546875,
|
|
"calib/pce": 0.33600309953549107,
|
|
"calib/std_conf": 0.254673754611707,
|
|
"calib/step_conf_rate": 0.85546875,
|
|
"calib/step_q_c": 0.4259090909090909,
|
|
"calib/step_q_c_n": 66.0,
|
|
"calib/step_q_gap": 0.0247227423921349,
|
|
"calib/step_q_w": 0.401186348516956,
|
|
"calib/step_q_w_n": 969.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2856.0,
|
|
"completions/max_terminated_length": 2856.0,
|
|
"completions/mean_length": 357.6953125,
|
|
"completions/mean_terminated_length": 361.936767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.03306666666666667,
|
|
"grad_norm": 0.015859203413128853,
|
|
"learning_rate": 4.694444444444445e-06,
|
|
"loss": 0.3608,
|
|
"num_tokens": 8086782.0,
|
|
"reward": 0.6042488217353821,
|
|
"reward_std": 0.4498489797115326,
|
|
"rewards/accuracy_reward_step": 0.06640625,
|
|
"rewards/final_brier_reward_step": 0.4897475838661194,
|
|
"rewards/format_reward_step": 0.65234375,
|
|
"step": 31
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9780595041811466,
|
|
"aux_distill/mean_u": 0.372310869459932,
|
|
"aux_distill/n_active_tok": 103.75,
|
|
"calib/answer_extract_rate": 0.80078125,
|
|
"calib/auroc": 0.49869109947643986,
|
|
"calib/avg_num_step_conf": 3.2421875,
|
|
"calib/ece": 0.369059009842277,
|
|
"calib/final_conf_rate": 0.81640625,
|
|
"calib/format_rate": 0.71484375,
|
|
"calib/frac_conf_gt_0.9": 0.07177033492822966,
|
|
"calib/gap": 0.004506691382592742,
|
|
"calib/mean_conf": 0.446437000272899,
|
|
"calib/mu_c": 0.45055555555555554,
|
|
"calib/mu_w": 0.4460488641729628,
|
|
"calib/nonempty_final_conf_rate": 0.81640625,
|
|
"calib/nonempty_reasoning_rate": 0.9296875,
|
|
"calib/nonempty_step_conf_rate": 0.86328125,
|
|
"calib/pce": 0.3646858041006502,
|
|
"calib/std_conf": 0.26680856897658306,
|
|
"calib/step_conf_rate": 0.86328125,
|
|
"calib/step_q_c": 0.5942307692307691,
|
|
"calib/step_q_c_n": 78.0,
|
|
"calib/step_q_gap": 0.14223462561374778,
|
|
"calib/step_q_w": 0.45199614361702134,
|
|
"calib/step_q_w_n": 752.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2095.0,
|
|
"completions/max_terminated_length": 2095.0,
|
|
"completions/mean_length": 329.16796875,
|
|
"completions/mean_terminated_length": 334.39288330078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 5.0,
|
|
"epoch": 0.034133333333333335,
|
|
"grad_norm": 0.01736695133149624,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": 0.4322,
|
|
"num_tokens": 8277753.0,
|
|
"reward": 0.6483778953552246,
|
|
"reward_std": 0.3941271901130676,
|
|
"rewards/accuracy_reward_step": 0.07421875,
|
|
"rewards/final_brier_reward_step": 0.5076932907104492,
|
|
"rewards/format_reward_step": 0.71484375,
|
|
"step": 32
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9598857369273901,
|
|
"aux_distill/mean_u": 0.39386526272428385,
|
|
"aux_distill/n_active_tok": 115.625,
|
|
"calib/answer_extract_rate": 0.86328125,
|
|
"calib/auroc": 0.437192118226601,
|
|
"calib/avg_num_step_conf": 3.6328125,
|
|
"calib/ece": 0.3795141434249163,
|
|
"calib/final_conf_rate": 0.83984375,
|
|
"calib/format_rate": 0.72265625,
|
|
"calib/frac_conf_gt_0.9": 0.05116279069767442,
|
|
"calib/gap": -0.08247836208386056,
|
|
"calib/mean_conf": 0.4240415852853815,
|
|
"calib/mu_c": 0.3461666666666667,
|
|
"calib/mu_w": 0.42864502875052723,
|
|
"calib/nonempty_final_conf_rate": 0.83984375,
|
|
"calib/nonempty_reasoning_rate": 0.9609375,
|
|
"calib/nonempty_step_conf_rate": 0.84375,
|
|
"calib/pce": 0.3738708876109629,
|
|
"calib/std_conf": 0.261931027986288,
|
|
"calib/step_conf_rate": 0.84375,
|
|
"calib/step_q_c": 0.5409999999999999,
|
|
"calib/step_q_c_n": 49.0,
|
|
"calib/step_q_gap": 0.11988479001135066,
|
|
"calib/step_q_w": 0.42111520998864926,
|
|
"calib/step_q_w_n": 881.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 3065.0,
|
|
"completions/max_terminated_length": 3065.0,
|
|
"completions/mean_length": 324.02734375,
|
|
"completions/mean_terminated_length": 329.170654296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.015602920204401016,
|
|
"learning_rate": 4.638888888888889e-06,
|
|
"loss": 0.289,
|
|
"num_tokens": 8467576.0,
|
|
"reward": 0.659507155418396,
|
|
"reward_std": 0.40564966201782227,
|
|
"rewards/accuracy_reward_step": 0.05078125,
|
|
"rewards/final_brier_reward_step": 0.545576810836792,
|
|
"rewards/format_reward_step": 0.72265625,
|
|
"step": 33
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9469494726508856,
|
|
"aux_distill/mean_u": 0.3839535558586833,
|
|
"aux_distill/n_active_tok": 126.125,
|
|
"calib/answer_extract_rate": 0.88671875,
|
|
"calib/auroc": 0.540210287013356,
|
|
"calib/avg_num_step_conf": 3.94140625,
|
|
"calib/ece": 0.32582589285714286,
|
|
"calib/final_conf_rate": 0.875,
|
|
"calib/format_rate": 0.79296875,
|
|
"calib/frac_conf_gt_0.9": 0.04017857142857143,
|
|
"calib/gap": 0.017872975277067327,
|
|
"calib/mean_conf": 0.40171874999999996,
|
|
"calib/mu_c": 0.41823529411764704,
|
|
"calib/mu_w": 0.4003623188405797,
|
|
"calib/nonempty_final_conf_rate": 0.875,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.91015625,
|
|
"calib/pce": 0.32582589285714286,
|
|
"calib/std_conf": 0.23804656358400092,
|
|
"calib/step_conf_rate": 0.91015625,
|
|
"calib/step_q_c": 0.4535365853658536,
|
|
"calib/step_q_c_n": 82.0,
|
|
"calib/step_q_gap": 0.05502737559951215,
|
|
"calib/step_q_w": 0.39850920976634147,
|
|
"calib/step_q_w_n": 927.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2176.0,
|
|
"completions/max_terminated_length": 2176.0,
|
|
"completions/mean_length": 319.31640625,
|
|
"completions/mean_terminated_length": 319.31640625,
|
|
"completions/min_length": 24.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.03626666666666667,
|
|
"grad_norm": 0.01574413850903511,
|
|
"learning_rate": 4.611111111111112e-06,
|
|
"loss": 0.3404,
|
|
"num_tokens": 8654433.0,
|
|
"reward": 0.7380313277244568,
|
|
"reward_std": 0.3996368646621704,
|
|
"rewards/accuracy_reward_step": 0.06640625,
|
|
"rewards/final_brier_reward_step": 0.6166876554489136,
|
|
"rewards/format_reward_step": 0.79296875,
|
|
"step": 34
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9516091421246529,
|
|
"aux_distill/mean_u": 0.4002384927589469,
|
|
"aux_distill/n_active_tok": 110.625,
|
|
"calib/answer_extract_rate": 0.91015625,
|
|
"calib/auroc": 0.6142270861833105,
|
|
"calib/avg_num_step_conf": 3.45703125,
|
|
"calib/ece": 0.3480051724137931,
|
|
"calib/final_conf_rate": 0.90625,
|
|
"calib/format_rate": 0.8046875,
|
|
"calib/frac_conf_gt_0.9": 0.021551724137931036,
|
|
"calib/gap": 0.08140136798905606,
|
|
"calib/mean_conf": 0.4212810344827586,
|
|
"calib/mu_c": 0.49671764705882354,
|
|
"calib/mu_w": 0.4153162790697675,
|
|
"calib/nonempty_final_conf_rate": 0.90625,
|
|
"calib/nonempty_reasoning_rate": 0.98046875,
|
|
"calib/nonempty_step_conf_rate": 0.8984375,
|
|
"calib/pce": 0.3480051724137931,
|
|
"calib/std_conf": 0.23485841219390075,
|
|
"calib/step_conf_rate": 0.8984375,
|
|
"calib/step_q_c": 0.4683333333333333,
|
|
"calib/step_q_c_n": 66.0,
|
|
"calib/step_q_gap": 0.012650671550671566,
|
|
"calib/step_q_w": 0.45568266178266176,
|
|
"calib/step_q_w_n": 819.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2100.0,
|
|
"completions/max_terminated_length": 2100.0,
|
|
"completions/mean_length": 278.61328125,
|
|
"completions/mean_terminated_length": 278.61328125,
|
|
"completions/min_length": 23.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.037333333333333336,
|
|
"grad_norm": 0.013850248418748379,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": 0.3803,
|
|
"num_tokens": 8835014.0,
|
|
"reward": 0.7425938844680786,
|
|
"reward_std": 0.3359072804450989,
|
|
"rewards/accuracy_reward_step": 0.06640625,
|
|
"rewards/final_brier_reward_step": 0.6140941381454468,
|
|
"rewards/format_reward_step": 0.8046875,
|
|
"step": 35
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9501235522329807,
|
|
"aux_distill/mean_u": 0.3815904537690148,
|
|
"aux_distill/n_active_tok": 126.125,
|
|
"calib/answer_extract_rate": 0.93359375,
|
|
"calib/auroc": 0.5592874867068416,
|
|
"calib/avg_num_step_conf": 3.94140625,
|
|
"calib/ece": 0.2880041152263375,
|
|
"calib/final_conf_rate": 0.94921875,
|
|
"calib/format_rate": 0.875,
|
|
"calib/frac_conf_gt_0.9": 0.03292181069958848,
|
|
"calib/gap": 0.035317263381779485,
|
|
"calib/mean_conf": 0.39499999999999996,
|
|
"calib/mu_c": 0.42653846153846153,
|
|
"calib/mu_w": 0.39122119815668205,
|
|
"calib/nonempty_final_conf_rate": 0.94921875,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.953125,
|
|
"calib/pce": 0.2880041152263375,
|
|
"calib/std_conf": 0.23859346073640167,
|
|
"calib/step_conf_rate": 0.953125,
|
|
"calib/step_q_c": 0.44833448275862076,
|
|
"calib/step_q_c_n": 116.0,
|
|
"calib/step_q_gap": 0.05377625207553005,
|
|
"calib/step_q_w": 0.3945582306830907,
|
|
"calib/step_q_w_n": 893.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2078.0,
|
|
"completions/max_terminated_length": 2078.0,
|
|
"completions/mean_length": 271.53125,
|
|
"completions/mean_terminated_length": 271.53125,
|
|
"completions/min_length": 23.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.015552644617855549,
|
|
"learning_rate": 4.555555555555556e-06,
|
|
"loss": 0.4061,
|
|
"num_tokens": 9007238.0,
|
|
"reward": 0.8283183574676514,
|
|
"reward_std": 0.3272024989128113,
|
|
"rewards/accuracy_reward_step": 0.1015625,
|
|
"rewards/final_brier_reward_step": 0.6800742149353027,
|
|
"rewards/format_reward_step": 0.875,
|
|
"step": 36
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9274401869624853,
|
|
"aux_distill/mean_u": 0.3502309705297757,
|
|
"aux_distill/n_active_tok": 117.5,
|
|
"calib/answer_extract_rate": 0.9609375,
|
|
"calib/auroc": 0.5682860717264386,
|
|
"calib/avg_num_step_conf": 3.703125,
|
|
"calib/ece": 0.29065833333333335,
|
|
"calib/final_conf_rate": 0.9375,
|
|
"calib/format_rate": 0.87890625,
|
|
"calib/frac_conf_gt_0.9": 0.03333333333333333,
|
|
"calib/gap": 0.046988323603002535,
|
|
"calib/mean_conf": 0.37959166666666666,
|
|
"calib/mu_c": 0.4222727272727273,
|
|
"calib/mu_w": 0.37528440366972476,
|
|
"calib/nonempty_final_conf_rate": 0.9375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.94140625,
|
|
"calib/pce": 0.28929166666666667,
|
|
"calib/std_conf": 0.23975560458632778,
|
|
"calib/step_conf_rate": 0.94140625,
|
|
"calib/step_q_c": 0.4208333333333333,
|
|
"calib/step_q_c_n": 84.0,
|
|
"calib/step_q_gap": 0.023319444444444393,
|
|
"calib/step_q_w": 0.3975138888888889,
|
|
"calib/step_q_w_n": 864.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2666.0,
|
|
"completions/max_terminated_length": 2666.0,
|
|
"completions/mean_length": 275.14453125,
|
|
"completions/mean_terminated_length": 276.2235412597656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.039466666666666664,
|
|
"grad_norm": 0.014115996658802032,
|
|
"learning_rate": 4.527777777777778e-06,
|
|
"loss": 0.3156,
|
|
"num_tokens": 9184771.0,
|
|
"reward": 0.8288605213165283,
|
|
"reward_std": 0.30853700637817383,
|
|
"rewards/accuracy_reward_step": 0.0859375,
|
|
"rewards/final_brier_reward_step": 0.6928772926330566,
|
|
"rewards/format_reward_step": 0.87890625,
|
|
"step": 37
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.959440141916275,
|
|
"aux_distill/mean_u": 0.41723974982834844,
|
|
"aux_distill/n_active_tok": 136.25,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.49417747641509435,
|
|
"calib/avg_num_step_conf": 4.2578125,
|
|
"calib/ece": 0.25258196721311477,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.90234375,
|
|
"calib/frac_conf_gt_0.9": 0.01639344262295082,
|
|
"calib/gap": -0.012299528301886742,
|
|
"calib/mean_conf": 0.35381147540983604,
|
|
"calib/mu_c": 0.343125,
|
|
"calib/mu_w": 0.35542452830188676,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.23762295081967214,
|
|
"calib/std_conf": 0.21697770649128129,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.4103448275862069,
|
|
"calib/step_q_c_n": 116.0,
|
|
"calib/step_q_gap": 0.047790412801812565,
|
|
"calib/step_q_w": 0.3625544147843943,
|
|
"calib/step_q_w_n": 974.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2869.0,
|
|
"completions/max_terminated_length": 2869.0,
|
|
"completions/mean_length": 294.90625,
|
|
"completions/mean_terminated_length": 294.90625,
|
|
"completions/min_length": 24.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.04053333333333333,
|
|
"grad_norm": 0.012253638356924057,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.4246,
|
|
"num_tokens": 9367155.0,
|
|
"reward": 0.8671990036964417,
|
|
"reward_std": 0.27873316407203674,
|
|
"rewards/accuracy_reward_step": 0.125,
|
|
"rewards/final_brier_reward_step": 0.7070543169975281,
|
|
"rewards/format_reward_step": 0.90234375,
|
|
"step": 38
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9337054584175348,
|
|
"aux_distill/mean_u": 0.3689994358002082,
|
|
"aux_distill/n_active_tok": 131.0,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.5425077639751553,
|
|
"calib/avg_num_step_conf": 4.09375,
|
|
"calib/ece": 0.3108972759682684,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9140625,
|
|
"calib/frac_conf_gt_0.9": 0.020242914979757085,
|
|
"calib/gap": 0.04685220792396477,
|
|
"calib/mean_conf": 0.3814235917577421,
|
|
"calib/mu_c": 0.4239130434782608,
|
|
"calib/mu_w": 0.37706083555429604,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.96484375,
|
|
"calib/pce": 0.299601729409564,
|
|
"calib/std_conf": 0.23669469534543297,
|
|
"calib/step_conf_rate": 0.96484375,
|
|
"calib/step_q_c": 0.4155555555555555,
|
|
"calib/step_q_c_n": 81.0,
|
|
"calib/step_q_gap": 0.016048833735493484,
|
|
"calib/step_q_w": 0.399506721820062,
|
|
"calib/step_q_w_n": 967.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2210.0,
|
|
"completions/max_terminated_length": 2210.0,
|
|
"completions/mean_length": 279.98046875,
|
|
"completions/mean_terminated_length": 279.98046875,
|
|
"completions/min_length": 10.0,
|
|
"completions/min_terminated_length": 10.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.01320998277515173,
|
|
"learning_rate": 4.472222222222223e-06,
|
|
"loss": 0.3357,
|
|
"num_tokens": 9544918.0,
|
|
"reward": 0.8607459664344788,
|
|
"reward_std": 0.26971176266670227,
|
|
"rewards/accuracy_reward_step": 0.08984375,
|
|
"rewards/final_brier_reward_step": 0.7175856828689575,
|
|
"rewards/format_reward_step": 0.9140625,
|
|
"step": 39
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9231565408408642,
|
|
"aux_distill/mean_u": 0.37410681811433055,
|
|
"aux_distill/n_active_tok": 131.0,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.39944819129368486,
|
|
"calib/avg_num_step_conf": 4.09375,
|
|
"calib/ece": 0.3130485829959514,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.92578125,
|
|
"calib/frac_conf_gt_0.9": 0.024291497975708502,
|
|
"calib/gap": -0.08006805640711212,
|
|
"calib/mean_conf": 0.35410121457489874,
|
|
"calib/mu_c": 0.2785714285714286,
|
|
"calib/mu_w": 0.3586394849785407,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 0.9921875,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.30523481781376516,
|
|
"calib/std_conf": 0.2113434420188021,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"calib/step_q_c": 0.296734693877551,
|
|
"calib/step_q_c_n": 49.0,
|
|
"calib/step_q_gap": -0.058788429245572094,
|
|
"calib/step_q_w": 0.3555231231231231,
|
|
"calib/step_q_w_n": 999.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1816.0,
|
|
"completions/max_terminated_length": 1816.0,
|
|
"completions/mean_length": 246.56640625,
|
|
"completions/mean_terminated_length": 246.56640625,
|
|
"completions/min_length": 26.0,
|
|
"completions/min_terminated_length": 26.0,
|
|
"epoch": 0.042666666666666665,
|
|
"grad_norm": 0.012942949309945107,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.3321,
|
|
"num_tokens": 9714799.0,
|
|
"reward": 0.8622703552246094,
|
|
"reward_std": 0.23404854536056519,
|
|
"rewards/accuracy_reward_step": 0.0546875,
|
|
"rewards/final_brier_reward_step": 0.7440719604492188,
|
|
"rewards/format_reward_step": 0.92578125,
|
|
"step": 40
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9747274816036224,
|
|
"aux_distill/mean_u": 0.40832076939570366,
|
|
"aux_distill/n_active_tok": 133.625,
|
|
"calib/answer_extract_rate": 0.9296875,
|
|
"calib/auroc": 0.5851856301814251,
|
|
"calib/avg_num_step_conf": 4.17578125,
|
|
"calib/ece": 0.18477224013948296,
|
|
"calib/final_conf_rate": 0.953125,
|
|
"calib/format_rate": 0.8984375,
|
|
"calib/frac_conf_gt_0.9": 0.00819672131147541,
|
|
"calib/gap": 0.051428750493258124,
|
|
"calib/mean_conf": 0.31940808772936946,
|
|
"calib/mu_c": 0.3621951219512195,
|
|
"calib/mu_w": 0.3107663714579614,
|
|
"calib/nonempty_final_conf_rate": 0.953125,
|
|
"calib/nonempty_reasoning_rate": 0.984375,
|
|
"calib/nonempty_step_conf_rate": 0.9609375,
|
|
"calib/pce": 0.16807377049180328,
|
|
"calib/std_conf": 0.20583733841643923,
|
|
"calib/step_conf_rate": 0.9609375,
|
|
"calib/step_q_c": 0.412258064516129,
|
|
"calib/step_q_c_n": 155.0,
|
|
"calib/step_q_gap": 0.07152944376965276,
|
|
"calib/step_q_w": 0.34072862074647625,
|
|
"calib/step_q_w_n": 914.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2628.0,
|
|
"completions/max_terminated_length": 2628.0,
|
|
"completions/mean_length": 266.53515625,
|
|
"completions/mean_terminated_length": 267.5804138183594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 25.0,
|
|
"epoch": 0.04373333333333333,
|
|
"grad_norm": 0.012249798513948917,
|
|
"learning_rate": 4.416666666666667e-06,
|
|
"loss": 0.3153,
|
|
"num_tokens": 9890280.0,
|
|
"reward": 0.8936082124710083,
|
|
"reward_std": 0.2826644480228424,
|
|
"rewards/accuracy_reward_step": 0.16015625,
|
|
"rewards/final_brier_reward_step": 0.7286226749420166,
|
|
"rewards/format_reward_step": 0.8984375,
|
|
"step": 41
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9232034366577864,
|
|
"aux_distill/mean_u": 0.412800156292371,
|
|
"aux_distill/n_active_tok": 146.5,
|
|
"calib/answer_extract_rate": 0.953125,
|
|
"calib/auroc": 0.46169073125291105,
|
|
"calib/avg_num_step_conf": 4.58203125,
|
|
"calib/ece": 0.2671510489795918,
|
|
"calib/final_conf_rate": 0.95703125,
|
|
"calib/format_rate": 0.94140625,
|
|
"calib/frac_conf_gt_0.9": 0.012244897959183673,
|
|
"calib/gap": -0.027628815323707534,
|
|
"calib/mean_conf": 0.33653880408163267,
|
|
"calib/mu_c": 0.3110526315789473,
|
|
"calib/mu_w": 0.33868144690265484,
|
|
"calib/nonempty_final_conf_rate": 0.95703125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2630694163265306,
|
|
"calib/std_conf": 0.20534667376184962,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3559259259259259,
|
|
"calib/step_q_c_n": 54.0,
|
|
"calib/step_q_gap": 0.0018249268195809631,
|
|
"calib/step_q_w": 0.35410099910634496,
|
|
"calib/step_q_w_n": 1119.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2789.0,
|
|
"completions/max_terminated_length": 2789.0,
|
|
"completions/mean_length": 263.94140625,
|
|
"completions/mean_terminated_length": 264.97650146484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.012458818033337593,
|
|
"learning_rate": 4.388888888888889e-06,
|
|
"loss": 0.4661,
|
|
"num_tokens": 10062217.0,
|
|
"reward": 0.8924587965011597,
|
|
"reward_std": 0.22109654545783997,
|
|
"rewards/accuracy_reward_step": 0.07421875,
|
|
"rewards/final_brier_reward_step": 0.7692925930023193,
|
|
"rewards/format_reward_step": 0.94140625,
|
|
"step": 42
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9289751965552568,
|
|
"aux_distill/mean_u": 0.36524919348664364,
|
|
"aux_distill/n_active_tok": 143.5,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.510925925925926,
|
|
"calib/avg_num_step_conf": 4.484375,
|
|
"calib/ece": 0.23542971887550201,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.008032128514056224,
|
|
"calib/gap": -0.01837222222222229,
|
|
"calib/mean_conf": 0.31618473895582333,
|
|
"calib/mu_c": 0.2995833333333333,
|
|
"calib/mu_w": 0.3179555555555556,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.22761445783132528,
|
|
"calib/std_conf": 0.21726067017121417,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3331,
|
|
"calib/step_q_c_n": 100.0,
|
|
"calib/step_q_gap": 0.003037022900763353,
|
|
"calib/step_q_w": 0.33006297709923665,
|
|
"calib/step_q_w_n": 1048.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 950.0,
|
|
"completions/max_terminated_length": 950.0,
|
|
"completions/mean_length": 250.06640625,
|
|
"completions/mean_terminated_length": 251.0470733642578,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.04586666666666667,
|
|
"grad_norm": 0.013071301393210888,
|
|
"learning_rate": 4.361111111111112e-06,
|
|
"loss": 0.2971,
|
|
"num_tokens": 10231458.0,
|
|
"reward": 0.9076724648475647,
|
|
"reward_std": 0.2054036259651184,
|
|
"rewards/accuracy_reward_step": 0.09375,
|
|
"rewards/final_brier_reward_step": 0.772376298904419,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 43
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9430578760802746,
|
|
"aux_distill/mean_u": 0.398295250677591,
|
|
"aux_distill/n_active_tok": 141.25,
|
|
"calib/answer_extract_rate": 0.95703125,
|
|
"calib/auroc": 0.4277208859252203,
|
|
"calib/avg_num_step_conf": 4.44140625,
|
|
"calib/ece": 0.2634333333333333,
|
|
"calib/final_conf_rate": 0.9375,
|
|
"calib/format_rate": 0.92578125,
|
|
"calib/frac_conf_gt_0.9": 0.016666666666666666,
|
|
"calib/gap": -0.06060871636103832,
|
|
"calib/mean_conf": 0.3316,
|
|
"calib/mu_c": 0.27578947368421053,
|
|
"calib/mu_w": 0.33639819004524885,
|
|
"calib/nonempty_final_conf_rate": 0.9375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2579333333333333,
|
|
"calib/std_conf": 0.20778155195621514,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.28509638554216865,
|
|
"calib/step_q_c_n": 83.0,
|
|
"calib/step_q_gap": -0.05889792185821091,
|
|
"calib/step_q_w": 0.34399430740037956,
|
|
"calib/step_q_w_n": 1054.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3016.0,
|
|
"completions/max_terminated_length": 3016.0,
|
|
"completions/mean_length": 279.29296875,
|
|
"completions/mean_terminated_length": 282.6047668457031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.046933333333333334,
|
|
"grad_norm": 0.012910122983157635,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": 0.3078,
|
|
"num_tokens": 10409277.0,
|
|
"reward": 0.8753119111061096,
|
|
"reward_std": 0.2265249788761139,
|
|
"rewards/accuracy_reward_step": 0.07421875,
|
|
"rewards/final_brier_reward_step": 0.7506237626075745,
|
|
"rewards/format_reward_step": 0.92578125,
|
|
"step": 44
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9212546311318874,
|
|
"aux_distill/mean_u": 0.35253776884269494,
|
|
"aux_distill/n_active_tok": 134.0,
|
|
"calib/answer_extract_rate": 0.96875,
|
|
"calib/auroc": 0.55645390070922,
|
|
"calib/avg_num_step_conf": 4.1875,
|
|
"calib/ece": 0.25850800000000007,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.95703125,
|
|
"calib/frac_conf_gt_0.9": 0.008,
|
|
"calib/gap": 0.03421134751773047,
|
|
"calib/mean_conf": 0.318508,
|
|
"calib/mu_c": 0.35066666666666674,
|
|
"calib/mu_w": 0.31645531914893626,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.25850800000000007,
|
|
"calib/std_conf": 0.20830739289809183,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.339375,
|
|
"calib/step_q_c_n": 48.0,
|
|
"calib/step_q_gap": 0.009534765624999997,
|
|
"calib/step_q_w": 0.329840234375,
|
|
"calib/step_q_w_n": 1024.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 3059.0,
|
|
"completions/max_terminated_length": 3059.0,
|
|
"completions/mean_length": 263.28125,
|
|
"completions/mean_terminated_length": 263.28125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.014113575220108032,
|
|
"learning_rate": 4.305555555555556e-06,
|
|
"loss": 0.4027,
|
|
"num_tokens": 10581725.0,
|
|
"reward": 0.9087159633636475,
|
|
"reward_std": 0.17309433221817017,
|
|
"rewards/accuracy_reward_step": 0.05859375,
|
|
"rewards/final_brier_reward_step": 0.8018069267272949,
|
|
"rewards/format_reward_step": 0.95703125,
|
|
"step": 45
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9593624100089073,
|
|
"aux_distill/mean_u": 0.4153745987914725,
|
|
"aux_distill/n_active_tok": 138.25,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.46116013308259807,
|
|
"calib/avg_num_step_conf": 4.32421875,
|
|
"calib/ece": 0.20393607751023624,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.007874015748031496,
|
|
"calib/gap": -0.03989102767475766,
|
|
"calib/mean_conf": 0.29469985703779533,
|
|
"calib/mu_c": 0.25967741935483873,
|
|
"calib/mu_w": 0.2995684470295964,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.18829434522677166,
|
|
"calib/std_conf": 0.20188818989869645,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3445454545454546,
|
|
"calib/step_q_c_n": 121.0,
|
|
"calib/step_q_gap": 0.0017594504886594975,
|
|
"calib/step_q_w": 0.3427860040567951,
|
|
"calib/step_q_w_n": 986.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1107.0,
|
|
"completions/max_terminated_length": 1107.0,
|
|
"completions/mean_length": 232.80078125,
|
|
"completions/mean_terminated_length": 233.7137451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 20.0,
|
|
"epoch": 0.04906666666666667,
|
|
"grad_norm": 0.01254682894796133,
|
|
"learning_rate": 4.277777777777778e-06,
|
|
"loss": 0.2697,
|
|
"num_tokens": 10746090.0,
|
|
"reward": 0.9490313529968262,
|
|
"reward_std": 0.14605683088302612,
|
|
"rewards/accuracy_reward_step": 0.12109375,
|
|
"rewards/final_brier_reward_step": 0.7965002655982971,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 46
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9571955800056458,
|
|
"aux_distill/mean_u": 0.4023008092076161,
|
|
"aux_distill/n_active_tok": 122.875,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.46918103448275855,
|
|
"calib/avg_num_step_conf": 3.8515625,
|
|
"calib/ece": 0.28256349206349207,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.94921875,
|
|
"calib/frac_conf_gt_0.9": 0.015873015873015872,
|
|
"calib/gap": -0.007560344827586163,
|
|
"calib/mean_conf": 0.3319603174603175,
|
|
"calib/mu_c": 0.325,
|
|
"calib/mu_w": 0.3325603448275862,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98046875,
|
|
"calib/pce": 0.2675793650793651,
|
|
"calib/std_conf": 0.22515204389421048,
|
|
"calib/step_conf_rate": 0.98046875,
|
|
"calib/step_q_c": 0.3146268656716419,
|
|
"calib/step_q_c_n": 67.0,
|
|
"calib/step_q_gap": -0.02809892322933749,
|
|
"calib/step_q_w": 0.3427257889009794,
|
|
"calib/step_q_w_n": 919.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2447.0,
|
|
"completions/max_terminated_length": 2447.0,
|
|
"completions/mean_length": 225.12890625,
|
|
"completions/mean_terminated_length": 225.12890625,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.050133333333333335,
|
|
"grad_norm": 0.014441216364502907,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.291,
|
|
"num_tokens": 10909699.0,
|
|
"reward": 0.8981256484985352,
|
|
"reward_std": 0.1963968276977539,
|
|
"rewards/accuracy_reward_step": 0.078125,
|
|
"rewards/final_brier_reward_step": 0.7689076066017151,
|
|
"rewards/format_reward_step": 0.94921875,
|
|
"step": 47
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9565908145159483,
|
|
"aux_distill/mean_u": 0.382051206029253,
|
|
"aux_distill/n_active_tok": 127.875,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5426973907150119,
|
|
"calib/avg_num_step_conf": 4.01953125,
|
|
"calib/ece": 0.21650036651687815,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.007905138339920948,
|
|
"calib/gap": 0.00587946273330664,
|
|
"calib/mean_conf": 0.3162632123666805,
|
|
"calib/mu_c": 0.3215384615384616,
|
|
"calib/mu_w": 0.31565899880515497,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.21499839023229317,
|
|
"calib/std_conf": 0.20934874325681233,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3501052631578947,
|
|
"calib/step_q_c_n": 95.0,
|
|
"calib/step_q_gap": 0.015301194635410742,
|
|
"calib/step_q_w": 0.33480406852248396,
|
|
"calib/step_q_w_n": 934.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2048.0,
|
|
"completions/max_terminated_length": 2048.0,
|
|
"completions/mean_length": 209.16015625,
|
|
"completions/mean_terminated_length": 209.98040771484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.012829742394387722,
|
|
"learning_rate": 4.222222222222223e-06,
|
|
"loss": 0.2875,
|
|
"num_tokens": 11066932.0,
|
|
"reward": 0.9423176050186157,
|
|
"reward_std": 0.1549239456653595,
|
|
"rewards/accuracy_reward_step": 0.1015625,
|
|
"rewards/final_brier_reward_step": 0.8026039600372314,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 48
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9549869894981384,
|
|
"aux_distill/mean_u": 0.40374540819858157,
|
|
"aux_distill/n_active_tok": 134.5,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.4641228070175439,
|
|
"calib/avg_num_step_conf": 4.203125,
|
|
"calib/ece": 0.24660474308300395,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.007905138339920948,
|
|
"calib/gap": -0.030000877192982445,
|
|
"calib/mean_conf": 0.31663636363636366,
|
|
"calib/mu_c": 0.2896,
|
|
"calib/mu_w": 0.31960087719298247,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2322134387351779,
|
|
"calib/std_conf": 0.21880287431412032,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.30495145631067966,
|
|
"calib/step_q_c_n": 103.0,
|
|
"calib/step_q_gap": -0.04869808120216723,
|
|
"calib/step_q_w": 0.3536495375128469,
|
|
"calib/step_q_w_n": 973.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2319.0,
|
|
"completions/max_terminated_length": 2319.0,
|
|
"completions/mean_length": 218.51171875,
|
|
"completions/mean_terminated_length": 218.51171875,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.05226666666666667,
|
|
"grad_norm": 0.01155038271099329,
|
|
"learning_rate": 4.194444444444445e-06,
|
|
"loss": 0.3168,
|
|
"num_tokens": 11227407.0,
|
|
"reward": 0.9383452534675598,
|
|
"reward_std": 0.1467917412519455,
|
|
"rewards/accuracy_reward_step": 0.09765625,
|
|
"rewards/final_brier_reward_step": 0.7985655069351196,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 49
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9325551148504019,
|
|
"aux_distill/mean_u": 0.39808962162575967,
|
|
"aux_distill/n_active_tok": 148.25,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.4599811676082862,
|
|
"calib/avg_num_step_conf": 4.63671875,
|
|
"calib/ece": 0.24267716535433073,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.029524482109227868,
|
|
"calib/mean_conf": 0.31354330708661415,
|
|
"calib/mu_c": 0.28611111111111115,
|
|
"calib/mu_w": 0.315635593220339,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.24267716535433073,
|
|
"calib/std_conf": 0.18163313183774915,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.34123287671232877,
|
|
"calib/step_q_c_n": 73.0,
|
|
"calib/step_q_gap": -0.00013390964314696774,
|
|
"calib/step_q_w": 0.34136678635547574,
|
|
"calib/step_q_w_n": 1114.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2543.0,
|
|
"completions/max_terminated_length": 2543.0,
|
|
"completions/mean_length": 244.6875,
|
|
"completions/mean_terminated_length": 245.64707946777344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.05333333333333334,
|
|
"grad_norm": 0.010162976570427418,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": 0.3105,
|
|
"num_tokens": 11395407.0,
|
|
"reward": 0.9332138299942017,
|
|
"reward_std": 0.13436943292617798,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.8195527195930481,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 50
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9404325764626265,
|
|
"aux_distill/mean_u": 0.35824322088937677,
|
|
"aux_distill/n_active_tok": 145.125,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5232451620290851,
|
|
"calib/avg_num_step_conf": 4.53515625,
|
|
"calib/ece": 0.2178271653543307,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.023622047244094488,
|
|
"calib/gap": 0.023285480361845856,
|
|
"calib/mean_conf": 0.3519366141732283,
|
|
"calib/mu_c": 0.37146341463414634,
|
|
"calib/mu_w": 0.3481779342723005,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.20417322834645665,
|
|
"calib/std_conf": 0.22717483458875004,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.38983695652173905,
|
|
"calib/step_q_c_n": 184.0,
|
|
"calib/step_q_gap": 0.004287314761247718,
|
|
"calib/step_q_w": 0.38554964176049134,
|
|
"calib/step_q_w_n": 977.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1844.0,
|
|
"completions/max_terminated_length": 1844.0,
|
|
"completions/mean_length": 227.8046875,
|
|
"completions/mean_terminated_length": 227.8046875,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.012741345912218094,
|
|
"learning_rate": 4.138888888888889e-06,
|
|
"loss": 0.3269,
|
|
"num_tokens": 11563021.0,
|
|
"reward": 0.9533712863922119,
|
|
"reward_std": 0.18221133947372437,
|
|
"rewards/accuracy_reward_step": 0.16015625,
|
|
"rewards/final_brier_reward_step": 0.766117513179779,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 51
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9390203971415758,
|
|
"aux_distill/mean_u": 0.4210420142587068,
|
|
"aux_distill/n_active_tok": 148.375,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.547441545238902,
|
|
"calib/avg_num_step_conf": 4.63671875,
|
|
"calib/ece": 0.24308300395256915,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.003952569169960474,
|
|
"calib/gap": 0.03881565570992879,
|
|
"calib/mean_conf": 0.3178656126482214,
|
|
"calib/mu_c": 0.35269230769230764,
|
|
"calib/mu_w": 0.31387665198237885,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.22909090909090907,
|
|
"calib/std_conf": 0.21644675865289373,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3586407766990291,
|
|
"calib/step_q_c_n": 103.0,
|
|
"calib/step_q_gap": -0.006827857987317776,
|
|
"calib/step_q_w": 0.3654686346863469,
|
|
"calib/step_q_w_n": 1084.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2905.0,
|
|
"completions/max_terminated_length": 2905.0,
|
|
"completions/mean_length": 235.1171875,
|
|
"completions/mean_terminated_length": 235.1171875,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.055466666666666664,
|
|
"grad_norm": 0.012383176013827324,
|
|
"learning_rate": 4.111111111111111e-06,
|
|
"loss": 0.3683,
|
|
"num_tokens": 11731163.0,
|
|
"reward": 0.9395182132720947,
|
|
"reward_std": 0.15534138679504395,
|
|
"rewards/accuracy_reward_step": 0.1015625,
|
|
"rewards/final_brier_reward_step": 0.8009113073348999,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 52
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9244867339730263,
|
|
"aux_distill/mean_u": 0.4068314368270426,
|
|
"aux_distill/n_active_tok": 151.625,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5781591074460345,
|
|
"calib/avg_num_step_conf": 4.73828125,
|
|
"calib/ece": 0.16415686274509805,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.051749939364540365,
|
|
"calib/mean_conf": 0.29780392156862745,
|
|
"calib/mu_c": 0.3418421052631579,
|
|
"calib/mu_w": 0.2900921658986175,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.15647058823529414,
|
|
"calib/std_conf": 0.20419913575587145,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.37377142857142853,
|
|
"calib/step_q_c_n": 175.0,
|
|
"calib/step_q_gap": 0.019372584640792723,
|
|
"calib/step_q_w": 0.3543988439306358,
|
|
"calib/step_q_w_n": 1038.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2570.0,
|
|
"completions/max_terminated_length": 2570.0,
|
|
"completions/mean_length": 237.59765625,
|
|
"completions/mean_terminated_length": 237.59765625,
|
|
"completions/min_length": 69.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.05653333333333333,
|
|
"grad_norm": 0.011600131168961525,
|
|
"learning_rate": 4.083333333333334e-06,
|
|
"loss": 0.2895,
|
|
"num_tokens": 11897812.0,
|
|
"reward": 0.9779938459396362,
|
|
"reward_std": 0.1375225931406021,
|
|
"rewards/accuracy_reward_step": 0.1484375,
|
|
"rewards/final_brier_reward_step": 0.8153626918792725,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 53
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.940915510058403,
|
|
"aux_distill/mean_u": 0.3600278968701587,
|
|
"aux_distill/n_active_tok": 152.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5066588785046728,
|
|
"calib/avg_num_step_conf": 4.7578125,
|
|
"calib/ece": 0.20701181102362204,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.003937007874015748,
|
|
"calib/gap": -0.0011144859813084218,
|
|
"calib/mean_conf": 0.3096889763779528,
|
|
"calib/mu_c": 0.30875,
|
|
"calib/mu_w": 0.30986448598130845,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.17961023622047245,
|
|
"calib/std_conf": 0.19435782031969961,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"calib/step_q_c": 0.4051515151515151,
|
|
"calib/step_q_c_n": 165.0,
|
|
"calib/step_q_gap": 0.026610204610204524,
|
|
"calib/step_q_w": 0.37854131054131057,
|
|
"calib/step_q_w_n": 1053.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2306.0,
|
|
"completions/max_terminated_length": 2306.0,
|
|
"completions/mean_length": 236.3125,
|
|
"completions/mean_terminated_length": 236.3125,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.01169079914689064,
|
|
"learning_rate": 4.055555555555556e-06,
|
|
"loss": 0.3243,
|
|
"num_tokens": 12064540.0,
|
|
"reward": 0.9626379609107971,
|
|
"reward_std": 0.158560112118721,
|
|
"rewards/accuracy_reward_step": 0.15625,
|
|
"rewards/final_brier_reward_step": 0.7885571122169495,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 54
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.90636813826859,
|
|
"aux_distill/mean_u": 0.3753203712973043,
|
|
"aux_distill/n_active_tok": 168.75,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5286458333333333,
|
|
"calib/avg_num_step_conf": 5.28515625,
|
|
"calib/ece": 0.21897783464566928,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.003937007874015748,
|
|
"calib/gap": 0.015135720720720724,
|
|
"calib/mean_conf": 0.3255211417322835,
|
|
"calib/mu_c": 0.33875,
|
|
"calib/mu_w": 0.32361427927927927,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2092573622047244,
|
|
"calib/std_conf": 0.22133494742193616,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.4003896103896104,
|
|
"calib/step_q_c_n": 154.0,
|
|
"calib/step_q_gap": 0.0166956987966162,
|
|
"calib/step_q_w": 0.3836939115929942,
|
|
"calib/step_q_w_n": 1199.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 847.0,
|
|
"completions/max_terminated_length": 847.0,
|
|
"completions/mean_length": 230.84375,
|
|
"completions/mean_terminated_length": 232.6614227294922,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.058666666666666666,
|
|
"grad_norm": 0.011481177993118763,
|
|
"learning_rate": 4.027777777777779e-06,
|
|
"loss": 0.2571,
|
|
"num_tokens": 12231460.0,
|
|
"reward": 0.9448150396347046,
|
|
"reward_std": 0.16676904261112213,
|
|
"rewards/accuracy_reward_step": 0.125,
|
|
"rewards/final_brier_reward_step": 0.7880675792694092,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 55
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9204953722655773,
|
|
"aux_distill/mean_u": 0.3976945579537004,
|
|
"aux_distill/n_active_tok": 161.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5634854771784232,
|
|
"calib/avg_num_step_conf": 5.0390625,
|
|
"calib/ece": 0.2430078125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.01171875,
|
|
"calib/gap": 0.07690456431535264,
|
|
"calib/mean_conf": 0.30160156250000003,
|
|
"calib/mu_c": 0.37399999999999994,
|
|
"calib/mu_w": 0.2970954356846473,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.2430078125,
|
|
"calib/std_conf": 0.21080058852517133,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.40829787234042547,
|
|
"calib/step_q_c_n": 94.0,
|
|
"calib/step_q_gap": 0.02134293923005759,
|
|
"calib/step_q_w": 0.3869549331103679,
|
|
"calib/step_q_w_n": 1196.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 851.0,
|
|
"completions/max_terminated_length": 851.0,
|
|
"completions/mean_length": 223.953125,
|
|
"completions/mean_terminated_length": 224.83139038085938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.05973333333333333,
|
|
"grad_norm": 0.012053816579282284,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.3112,
|
|
"num_tokens": 12395632.0,
|
|
"reward": 0.9471312761306763,
|
|
"reward_std": 0.11150771379470825,
|
|
"rewards/accuracy_reward_step": 0.05859375,
|
|
"rewards/final_brier_reward_step": 0.8434812426567078,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 56
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9139548428356647,
|
|
"aux_distill/mean_u": 0.37871803075938426,
|
|
"aux_distill/n_active_tok": 175.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.447172619047619,
|
|
"calib/avg_num_step_conf": 5.46875,
|
|
"calib/ece": 0.22818503937007875,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.023622047244094488,
|
|
"calib/gap": -0.04487946428571421,
|
|
"calib/mean_conf": 0.2985787401574803,
|
|
"calib/mu_c": 0.259,
|
|
"calib/mu_w": 0.3038794642857142,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.2043267716535433,
|
|
"calib/std_conf": 0.23301049218710934,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"calib/step_q_c": 0.3707453416149068,
|
|
"calib/step_q_c_n": 161.0,
|
|
"calib/step_q_gap": -0.0028575639541004727,
|
|
"calib/step_q_w": 0.37360290556900727,
|
|
"calib/step_q_w_n": 1239.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2270.0,
|
|
"completions/max_terminated_length": 2270.0,
|
|
"completions/mean_length": 255.6953125,
|
|
"completions/mean_terminated_length": 255.6953125,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.01160177867859602,
|
|
"learning_rate": 3.972222222222223e-06,
|
|
"loss": 0.3035,
|
|
"num_tokens": 12567882.0,
|
|
"reward": 0.9475748538970947,
|
|
"reward_std": 0.14415797591209412,
|
|
"rewards/accuracy_reward_step": 0.1171875,
|
|
"rewards/final_brier_reward_step": 0.7896810173988342,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 57
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9138444364070892,
|
|
"aux_distill/mean_u": 0.35042719995743726,
|
|
"aux_distill/n_active_tok": 160.75,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.46266721266721267,
|
|
"calib/avg_num_step_conf": 5.0234375,
|
|
"calib/ece": 0.2456862745098039,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.01568627450980392,
|
|
"calib/gap": -0.035843570843570816,
|
|
"calib/mean_conf": 0.3475686274509804,
|
|
"calib/mu_c": 0.3163636363636364,
|
|
"calib/mu_w": 0.3522072072072072,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.23192156862745097,
|
|
"calib/std_conf": 0.22922967803116806,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.39843537414965985,
|
|
"calib/step_q_c_n": 147.0,
|
|
"calib/step_q_gap": 0.009971809619370164,
|
|
"calib/step_q_w": 0.3884635645302897,
|
|
"calib/step_q_w_n": 1139.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2465.0,
|
|
"completions/max_terminated_length": 2465.0,
|
|
"completions/mean_length": 226.9453125,
|
|
"completions/mean_terminated_length": 226.9453125,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.06186666666666667,
|
|
"grad_norm": 0.015932245180010796,
|
|
"learning_rate": 3.944444444444445e-06,
|
|
"loss": 0.3364,
|
|
"num_tokens": 12732300.0,
|
|
"reward": 0.9416613578796387,
|
|
"reward_std": 0.16059020161628723,
|
|
"rewards/accuracy_reward_step": 0.12890625,
|
|
"rewards/final_brier_reward_step": 0.7700413465499878,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 58
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8896596431732178,
|
|
"aux_distill/mean_u": 0.3602237890788429,
|
|
"aux_distill/n_active_tok": 180.875,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.44578582606751616,
|
|
"calib/avg_num_step_conf": 5.65234375,
|
|
"calib/ece": 0.20599607843137255,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.01568627450980392,
|
|
"calib/gap": -0.04110529845741112,
|
|
"calib/mean_conf": 0.31885882352941175,
|
|
"calib/mu_c": 0.2845238095238095,
|
|
"calib/mu_w": 0.32562910798122063,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.18007450980392156,
|
|
"calib/std_conf": 0.2146131304358581,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.36808823529411766,
|
|
"calib/step_q_c_n": 204.0,
|
|
"calib/step_q_gap": -0.018032440490274915,
|
|
"calib/step_q_w": 0.3861206757843926,
|
|
"calib/step_q_w_n": 1243.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2643.0,
|
|
"completions/max_terminated_length": 2643.0,
|
|
"completions/mean_length": 250.51953125,
|
|
"completions/mean_terminated_length": 250.51953125,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.06293333333333333,
|
|
"grad_norm": 0.01418408565223217,
|
|
"learning_rate": 3.916666666666667e-06,
|
|
"loss": 0.3289,
|
|
"num_tokens": 12902681.0,
|
|
"reward": 0.9583262205123901,
|
|
"reward_std": 0.15988890826702118,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.7682148814201355,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 59
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.922422407194972,
|
|
"aux_distill/mean_u": 0.35424350929867404,
|
|
"aux_distill/n_active_tok": 170.125,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5448898265353961,
|
|
"calib/avg_num_step_conf": 5.578125,
|
|
"calib/ece": 0.2269411764705882,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.00392156862745098,
|
|
"calib/gap": 0.043080168776371386,
|
|
"calib/mean_conf": 0.27996078431372545,
|
|
"calib/mu_c": 0.32000000000000006,
|
|
"calib/mu_w": 0.2769198312236287,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.21815686274509802,
|
|
"calib/std_conf": 0.21641734923606573,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3780232558139534,
|
|
"calib/step_q_c_n": 86.0,
|
|
"calib/step_q_gap": 0.022562003951062226,
|
|
"calib/step_q_w": 0.3554612518628912,
|
|
"calib/step_q_w_n": 1342.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1250.0,
|
|
"completions/max_terminated_length": 1250.0,
|
|
"completions/mean_length": 225.65625,
|
|
"completions/mean_terminated_length": 226.5411834716797,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.01299036294221878,
|
|
"learning_rate": 3.88888888888889e-06,
|
|
"loss": 0.2561,
|
|
"num_tokens": 13069305.0,
|
|
"reward": 0.9524890780448914,
|
|
"reward_std": 0.11521777510643005,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.8424781560897827,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 60
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.919845612719655,
|
|
"aux_distill/mean_u": 0.34344900348094065,
|
|
"aux_distill/n_active_tok": 170.75,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4713050314465409,
|
|
"calib/avg_num_step_conf": 5.33984375,
|
|
"calib/ece": 0.1759685039370079,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.003937007874015748,
|
|
"calib/gap": -0.021732704402515735,
|
|
"calib/mean_conf": 0.2464724409448819,
|
|
"calib/mu_c": 0.22833333333333333,
|
|
"calib/mu_w": 0.25006603773584907,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.12854330708661418,
|
|
"calib/std_conf": 0.1897106718249609,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3376363636363636,
|
|
"calib/step_q_c_n": 220.0,
|
|
"calib/step_q_gap": 0.013825552825552812,
|
|
"calib/step_q_w": 0.3238108108108108,
|
|
"calib/step_q_w_n": 1147.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 770.0,
|
|
"completions/max_terminated_length": 770.0,
|
|
"completions/mean_length": 221.8515625,
|
|
"completions/mean_terminated_length": 222.72158813476562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.06506666666666666,
|
|
"grad_norm": 0.016961747780442238,
|
|
"learning_rate": 3.861111111111112e-06,
|
|
"loss": 0.2661,
|
|
"num_tokens": 13230163.0,
|
|
"reward": 0.9816569089889526,
|
|
"reward_std": 0.11320608109235764,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.8070638179779053,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 61
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8867218066006899,
|
|
"aux_distill/mean_u": 0.3593741841944759,
|
|
"aux_distill/n_active_tok": 173.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5004852013585638,
|
|
"calib/avg_num_step_conf": 5.4296875,
|
|
"calib/ece": 0.18423046875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0078125,
|
|
"calib/gap": -0.011993692382338661,
|
|
"calib/mean_conf": 0.26183984375,
|
|
"calib/mu_c": 0.2511111111111111,
|
|
"calib/mu_w": 0.2631048034934498,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.17030078125,
|
|
"calib/std_conf": 0.19503744065236703,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3344662162162162,
|
|
"calib/step_q_c_n": 148.0,
|
|
"calib/step_q_gap": -0.0010531074552813657,
|
|
"calib/step_q_w": 0.33551932367149756,
|
|
"calib/step_q_w_n": 1242.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 748.0,
|
|
"completions/max_terminated_length": 748.0,
|
|
"completions/mean_length": 231.8046875,
|
|
"completions/mean_terminated_length": 232.7137451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.06613333333333334,
|
|
"grad_norm": 0.015427176840603352,
|
|
"learning_rate": 3.833333333333334e-06,
|
|
"loss": 0.2946,
|
|
"num_tokens": 13396585.0,
|
|
"reward": 0.9731844663619995,
|
|
"reward_std": 0.09732332825660706,
|
|
"rewards/accuracy_reward_step": 0.10546875,
|
|
"rewards/final_brier_reward_step": 0.8409003019332886,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 62
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9000698383897543,
|
|
"aux_distill/mean_u": 0.38631773311322914,
|
|
"aux_distill/n_active_tok": 186.5,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.4998644619137978,
|
|
"calib/avg_num_step_conf": 5.828125,
|
|
"calib/ece": 0.15207171314741033,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.00398406374501992,
|
|
"calib/gap": -0.013889943074003763,
|
|
"calib/mean_conf": 0.2455378486055777,
|
|
"calib/mu_c": 0.2335294117647059,
|
|
"calib/mu_w": 0.24741935483870967,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.13107569721115536,
|
|
"calib/std_conf": 0.20110454348946114,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.313854748603352,
|
|
"calib/step_q_c_n": 179.0,
|
|
"calib/step_q_gap": -0.011379828700532224,
|
|
"calib/step_q_w": 0.3252345773038842,
|
|
"calib/step_q_w_n": 1313.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2491.0,
|
|
"completions/max_terminated_length": 2491.0,
|
|
"completions/mean_length": 255.2421875,
|
|
"completions/mean_terminated_length": 257.251953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.012757709249854088,
|
|
"learning_rate": 3.8055555555555556e-06,
|
|
"loss": 0.2502,
|
|
"num_tokens": 13570567.0,
|
|
"reward": 0.9583083391189575,
|
|
"reward_std": 0.15712447464466095,
|
|
"rewards/accuracy_reward_step": 0.1328125,
|
|
"rewards/final_brier_reward_step": 0.8072417974472046,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 63
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8718363661319017,
|
|
"aux_distill/mean_u": 0.34136569305581255,
|
|
"aux_distill/n_active_tok": 196.125,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4279731070208311,
|
|
"calib/avg_num_step_conf": 6.12890625,
|
|
"calib/ece": 0.1686771653543307,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.003937007874015748,
|
|
"calib/gap": -0.04626562327785741,
|
|
"calib/mean_conf": 0.2149448818897638,
|
|
"calib/mu_c": 0.17651162790697672,
|
|
"calib/mu_w": 0.22277725118483413,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.10716535433070866,
|
|
"calib/std_conf": 0.18758081962484274,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.2654395161290322,
|
|
"calib/step_q_c_n": 248.0,
|
|
"calib/step_q_gap": -0.04765753156211089,
|
|
"calib/step_q_w": 0.3130970476911431,
|
|
"calib/step_q_w_n": 1321.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1838.0,
|
|
"completions/max_terminated_length": 1838.0,
|
|
"completions/mean_length": 246.79296875,
|
|
"completions/mean_terminated_length": 247.76080322265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.06826666666666667,
|
|
"grad_norm": 0.012563909403979778,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.256,
|
|
"num_tokens": 13737522.0,
|
|
"reward": 0.9775606989860535,
|
|
"reward_std": 0.10948432981967926,
|
|
"rewards/accuracy_reward_step": 0.16796875,
|
|
"rewards/final_brier_reward_step": 0.7988713979721069,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 64
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8879748061299324,
|
|
"aux_distill/mean_u": 0.3467711585862075,
|
|
"aux_distill/n_active_tok": 175.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4754679144385026,
|
|
"calib/avg_num_step_conf": 5.53515625,
|
|
"calib/ece": 0.1719724409448819,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.007874015748031496,
|
|
"calib/gap": -0.030123796791443863,
|
|
"calib/mean_conf": 0.2243267716535433,
|
|
"calib/mu_c": 0.19823529411764707,
|
|
"calib/mu_w": 0.22835909090909093,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.1312204724409449,
|
|
"calib/std_conf": 0.20970964064097472,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.30329139072847683,
|
|
"calib/step_q_c_n": 151.0,
|
|
"calib/step_q_gap": -0.01926469142002235,
|
|
"calib/step_q_w": 0.3225560821484992,
|
|
"calib/step_q_w_n": 1266.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 693.0,
|
|
"completions/max_terminated_length": 693.0,
|
|
"completions/mean_length": 222.6328125,
|
|
"completions/mean_terminated_length": 223.50588989257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.06933333333333333,
|
|
"grad_norm": 0.013984648510813713,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.2588,
|
|
"num_tokens": 13899540.0,
|
|
"reward": 0.9684619903564453,
|
|
"reward_std": 0.11982069909572601,
|
|
"rewards/accuracy_reward_step": 0.1328125,
|
|
"rewards/final_brier_reward_step": 0.8158302307128906,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 65
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8976942636072636,
|
|
"aux_distill/mean_u": 0.3792002791541046,
|
|
"aux_distill/n_active_tok": 193.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.3524753792919883,
|
|
"calib/avg_num_step_conf": 6.03125,
|
|
"calib/ece": 0.17583921568627453,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.08709728506787331,
|
|
"calib/mean_conf": 0.21051372549019606,
|
|
"calib/mu_c": 0.1350294117647059,
|
|
"calib/mu_w": 0.2221266968325792,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.12650980392156866,
|
|
"calib/std_conf": 0.18832027060679057,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3032251308900524,
|
|
"calib/step_q_c_n": 191.0,
|
|
"calib/step_q_gap": 0.0042280872832526906,
|
|
"calib/step_q_w": 0.2989970436067997,
|
|
"calib/step_q_w_n": 1353.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1351.0,
|
|
"completions/max_terminated_length": 1351.0,
|
|
"completions/mean_length": 249.69140625,
|
|
"completions/mean_terminated_length": 249.69140625,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.024497196078300476,
|
|
"learning_rate": 3.7222222222222225e-06,
|
|
"loss": 0.321,
|
|
"num_tokens": 14069813.0,
|
|
"reward": 0.9704102873802185,
|
|
"reward_std": 0.09235270321369171,
|
|
"rewards/accuracy_reward_step": 0.1328125,
|
|
"rewards/final_brier_reward_step": 0.815820574760437,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 66
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8928143437951803,
|
|
"aux_distill/mean_u": 0.3683333400300812,
|
|
"aux_distill/n_active_tok": 201.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5283459595959596,
|
|
"calib/avg_num_step_conf": 6.3515625,
|
|
"calib/ece": 0.11288671875000002,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.017947979797979796,
|
|
"calib/mean_conf": 0.18846484375,
|
|
"calib/mu_c": 0.2038888888888889,
|
|
"calib/mu_w": 0.1859409090909091,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.08036328125,
|
|
"calib/std_conf": 0.15787680229070414,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.31103092783505154,
|
|
"calib/step_q_c_n": 194.0,
|
|
"calib/step_q_gap": 0.002063050740079453,
|
|
"calib/step_q_w": 0.3089678770949721,
|
|
"calib/step_q_w_n": 1432.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 818.0,
|
|
"completions/max_terminated_length": 818.0,
|
|
"completions/mean_length": 259.2109375,
|
|
"completions/mean_terminated_length": 260.22747802734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.07146666666666666,
|
|
"grad_norm": 0.01422516256570816,
|
|
"learning_rate": 3.694444444444445e-06,
|
|
"loss": 0.2683,
|
|
"num_tokens": 14241179.0,
|
|
"reward": 0.9945505857467651,
|
|
"reward_std": 0.07644922286272049,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.852382481098175,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 67
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8721083607524633,
|
|
"aux_distill/mean_u": 0.33766640953665167,
|
|
"aux_distill/n_active_tok": 196.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.47830636160714285,
|
|
"calib/avg_num_step_conf": 6.2578125,
|
|
"calib/ece": 0.14378906249999995,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.0017410714285714113,
|
|
"calib/mean_conf": 0.1987890625,
|
|
"calib/mu_c": 0.2003125,
|
|
"calib/mu_w": 0.1985714285714286,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.10878906249999999,
|
|
"calib/std_conf": 0.15857427047087777,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.282723880597015,
|
|
"calib/step_q_c_n": 134.0,
|
|
"calib/step_q_gap": 0.010048131278213857,
|
|
"calib/step_q_w": 0.2726757493188011,
|
|
"calib/step_q_w_n": 1468.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 833.0,
|
|
"completions/max_terminated_length": 833.0,
|
|
"completions/mean_length": 244.5546875,
|
|
"completions/mean_terminated_length": 245.51373291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.07253333333333334,
|
|
"grad_norm": 0.017560146749019623,
|
|
"learning_rate": 3.6666666666666666e-06,
|
|
"loss": 0.2459,
|
|
"num_tokens": 14407873.0,
|
|
"reward": 0.9854179620742798,
|
|
"reward_std": 0.08683042228221893,
|
|
"rewards/accuracy_reward_step": 0.125,
|
|
"rewards/final_brier_reward_step": 0.8536484241485596,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 68
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8708281461149454,
|
|
"aux_distill/mean_u": 0.369939081804995,
|
|
"aux_distill/n_active_tok": 212.5,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5502949852507375,
|
|
"calib/avg_num_step_conf": 6.65234375,
|
|
"calib/ece": 0.10876953124999995,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.056848082595870236,
|
|
"calib/mean_conf": 0.18248046875,
|
|
"calib/mu_c": 0.2326666666666667,
|
|
"calib/mu_w": 0.17581858407079645,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.08703124999999998,
|
|
"calib/std_conf": 0.14876641062763554,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2950264550264551,
|
|
"calib/step_q_c_n": 189.0,
|
|
"calib/step_q_gap": 0.010110338778106298,
|
|
"calib/step_q_w": 0.2849161162483488,
|
|
"calib/step_q_w_n": 1514.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1226.0,
|
|
"completions/max_terminated_length": 1226.0,
|
|
"completions/mean_length": 275.57421875,
|
|
"completions/mean_terminated_length": 276.6549072265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.022750258445739746,
|
|
"learning_rate": 3.638888888888889e-06,
|
|
"loss": 0.2769,
|
|
"num_tokens": 14582916.0,
|
|
"reward": 0.995856761932373,
|
|
"reward_std": 0.0744517594575882,
|
|
"rewards/accuracy_reward_step": 0.1171875,
|
|
"rewards/final_brier_reward_step": 0.8784323334693909,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 69
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8883331622928381,
|
|
"aux_distill/mean_u": 0.35891217924730157,
|
|
"aux_distill/n_active_tok": 208.875,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5104230533415083,
|
|
"calib/avg_num_step_conf": 7.11328125,
|
|
"calib/ece": 0.11444881889763778,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.027016145513999584,
|
|
"calib/mean_conf": 0.1575984251968504,
|
|
"calib/mu_c": 0.1823809523809524,
|
|
"calib/mu_w": 0.1553648068669528,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.09468503937007872,
|
|
"calib/std_conf": 0.1397020879706054,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.27740740740740744,
|
|
"calib/step_q_c_n": 135.0,
|
|
"calib/step_q_gap": 0.03019922235402664,
|
|
"calib/step_q_w": 0.2472081850533808,
|
|
"calib/step_q_w_n": 1686.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 790.0,
|
|
"completions/max_terminated_length": 790.0,
|
|
"completions/mean_length": 264.296875,
|
|
"completions/mean_terminated_length": 266.3779602050781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.07466666666666667,
|
|
"grad_norm": 0.0257136020809412,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": 0.2185,
|
|
"num_tokens": 14757568.0,
|
|
"reward": 0.9734380841255188,
|
|
"reward_std": 0.09189367294311523,
|
|
"rewards/accuracy_reward_step": 0.08203125,
|
|
"rewards/final_brier_reward_step": 0.8843761682510376,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 70
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8784806150943041,
|
|
"aux_distill/mean_u": 0.3529545415400276,
|
|
"aux_distill/n_active_tok": 216.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4949299855142444,
|
|
"calib/avg_num_step_conf": 6.98828125,
|
|
"calib/ece": 0.1296484375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005101400289715119,
|
|
"calib/mean_conf": 0.1714453125,
|
|
"calib/mu_c": 0.17578947368421052,
|
|
"calib/mu_w": 0.1706880733944954,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.076328125,
|
|
"calib/std_conf": 0.16355407212685763,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3313185344827586,
|
|
"calib/step_q_c_n": 232.0,
|
|
"calib/step_q_gap": 0.03309554154762695,
|
|
"calib/step_q_w": 0.29822299293513166,
|
|
"calib/step_q_w_n": 1557.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1264.0,
|
|
"completions/max_terminated_length": 1264.0,
|
|
"completions/mean_length": 280.64453125,
|
|
"completions/mean_terminated_length": 281.7451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.07573333333333333,
|
|
"grad_norm": 0.03355726599693298,
|
|
"learning_rate": 3.5833333333333335e-06,
|
|
"loss": 0.2439,
|
|
"num_tokens": 14933821.0,
|
|
"reward": 0.9902146458625793,
|
|
"reward_std": 0.09100230038166046,
|
|
"rewards/accuracy_reward_step": 0.1484375,
|
|
"rewards/final_brier_reward_step": 0.8398042321205139,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 71
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8864923529326916,
|
|
"aux_distill/mean_u": 0.3814361559322445,
|
|
"aux_distill/n_active_tok": 249.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5487861811391224,
|
|
"calib/avg_num_step_conf": 7.86328125,
|
|
"calib/ece": 0.09687890625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.007557889822595698,
|
|
"calib/mean_conf": 0.15241796875,
|
|
"calib/mu_c": 0.15944444444444447,
|
|
"calib/mu_w": 0.15188655462184877,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0894921875,
|
|
"calib/std_conf": 0.13825932774454686,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.21701754385964916,
|
|
"calib/step_q_c_n": 114.0,
|
|
"calib/step_q_gap": -0.0705435409218148,
|
|
"calib/step_q_w": 0.28756108478146397,
|
|
"calib/step_q_w_n": 1899.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 975.0,
|
|
"completions/max_terminated_length": 975.0,
|
|
"completions/mean_length": 289.875,
|
|
"completions/mean_terminated_length": 291.01177978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.03782118856906891,
|
|
"learning_rate": 3.555555555555556e-06,
|
|
"loss": 0.289,
|
|
"num_tokens": 15112437.0,
|
|
"reward": 0.9824103116989136,
|
|
"reward_std": 0.07157760858535767,
|
|
"rewards/accuracy_reward_step": 0.0703125,
|
|
"rewards/final_brier_reward_step": 0.9023206830024719,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 72
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.881175821647048,
|
|
"aux_distill/mean_u": 0.3102595000996086,
|
|
"aux_distill/n_active_tok": 232.375,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5427479990298326,
|
|
"calib/avg_num_step_conf": 7.265625,
|
|
"calib/ece": 0.1055254901960784,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014407955372301717,
|
|
"calib/mean_conf": 0.14905490196078433,
|
|
"calib/mu_c": 0.1613157894736842,
|
|
"calib/mu_w": 0.14690783410138247,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.05278039215686275,
|
|
"calib/std_conf": 0.15025799031146853,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3429217391304348,
|
|
"calib/step_q_c_n": 230.0,
|
|
"calib/step_q_gap": 0.04303124833288874,
|
|
"calib/step_q_w": 0.29989049079754604,
|
|
"calib/step_q_w_n": 1630.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1199.0,
|
|
"completions/max_terminated_length": 1199.0,
|
|
"completions/mean_length": 292.578125,
|
|
"completions/mean_terminated_length": 293.7254943847656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 82.0,
|
|
"epoch": 0.07786666666666667,
|
|
"grad_norm": 0.029115431010723114,
|
|
"learning_rate": 3.5277777777777784e-06,
|
|
"loss": 0.246,
|
|
"num_tokens": 15294369.0,
|
|
"reward": 0.9939548969268799,
|
|
"reward_std": 0.08326876163482666,
|
|
"rewards/accuracy_reward_step": 0.1484375,
|
|
"rewards/final_brier_reward_step": 0.8472848534584045,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 73
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8466088864952326,
|
|
"aux_distill/mean_u": 0.3597551678076145,
|
|
"aux_distill/n_active_tok": 258.125,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.48332210242587603,
|
|
"calib/avg_num_step_conf": 8.84765625,
|
|
"calib/ece": 0.11814960629921259,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0013522012578616738,
|
|
"calib/mean_conf": 0.15720472440944885,
|
|
"calib/mu_c": 0.15833333333333335,
|
|
"calib/mu_w": 0.15698113207547168,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.05499999999999999,
|
|
"calib/std_conf": 0.139694765038833,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.21719741100323625,
|
|
"calib/step_q_c_n": 309.0,
|
|
"calib/step_q_gap": -0.06678162785156944,
|
|
"calib/step_q_w": 0.2839790388548057,
|
|
"calib/step_q_w_n": 1956.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1924.0,
|
|
"completions/max_terminated_length": 1924.0,
|
|
"completions/mean_length": 295.48828125,
|
|
"completions/mean_terminated_length": 297.8149719238281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 76.0,
|
|
"epoch": 0.07893333333333333,
|
|
"grad_norm": 0.016858849674463272,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.2195,
|
|
"num_tokens": 15473942.0,
|
|
"reward": 0.992323637008667,
|
|
"reward_std": 0.08323856443166733,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.832303524017334,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 74
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8464425075799227,
|
|
"aux_distill/mean_u": 0.3196149480640056,
|
|
"aux_distill/n_active_tok": 260.625,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5824703344120821,
|
|
"calib/avg_num_step_conf": 9.08984375,
|
|
"calib/ece": 0.07495219123505976,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.05759708737864086,
|
|
"calib/mean_conf": 0.14472908366533863,
|
|
"calib/mu_c": 0.19200000000000006,
|
|
"calib/mu_w": 0.1344029126213592,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.02019920318725099,
|
|
"calib/std_conf": 0.13802181791973867,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3805806451612903,
|
|
"calib/step_q_c_n": 310.0,
|
|
"calib/step_q_gap": 0.11913245477953521,
|
|
"calib/step_q_w": 0.2614481903817551,
|
|
"calib/step_q_w_n": 2017.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2202.0,
|
|
"completions/max_terminated_length": 2202.0,
|
|
"completions/mean_length": 295.87890625,
|
|
"completions/mean_terminated_length": 299.3873596191406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 95.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.0117812380194664,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": 0.1928,
|
|
"num_tokens": 15654439.0,
|
|
"reward": 0.9965642094612122,
|
|
"reward_std": 0.11236327886581421,
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
|
"rewards/final_brier_reward_step": 0.8329721689224243,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 75
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8963596131652594,
|
|
"aux_distill/mean_u": 0.37019422318096695,
|
|
"aux_distill/n_active_tok": 243.125,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5180759803921569,
|
|
"calib/avg_num_step_conf": 8.1953125,
|
|
"calib/ece": 0.1313095238095238,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014779411764705846,
|
|
"calib/mean_conf": 0.12511904761904763,
|
|
"calib/mu_c": 0.1370833333333333,
|
|
"calib/mu_w": 0.12230392156862746,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.03297619047619047,
|
|
"calib/std_conf": 0.1267409623679342,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.33612712418300655,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": 0.042306811683006595,
|
|
"calib/step_q_w": 0.29382031249999996,
|
|
"calib/step_q_w_n": 1792.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2717.0,
|
|
"completions/max_terminated_length": 2717.0,
|
|
"completions/mean_length": 293.77734375,
|
|
"completions/mean_terminated_length": 297.2608947753906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 85.0,
|
|
"epoch": 0.08106666666666666,
|
|
"grad_norm": 0.010141227394342422,
|
|
"learning_rate": 3.444444444444445e-06,
|
|
"loss": 0.2049,
|
|
"num_tokens": 15832702.0,
|
|
"reward": 0.9905637502670288,
|
|
"reward_std": 0.10064421594142914,
|
|
"rewards/accuracy_reward_step": 0.1875,
|
|
"rewards/final_brier_reward_step": 0.8131588697433472,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 76
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.870359031483531,
|
|
"aux_distill/mean_u": 0.34247186531231555,
|
|
"aux_distill/n_active_tok": 248.375,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.43320664414414417,
|
|
"calib/avg_num_step_conf": 7.76171875,
|
|
"calib/ece": 0.12236614173228347,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.03174239864864864,
|
|
"calib/mean_conf": 0.14464960629921259,
|
|
"calib/mu_c": 0.11690625000000002,
|
|
"calib/mu_w": 0.14864864864864866,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.07051574803149607,
|
|
"calib/std_conf": 0.14479675974863274,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2990127659574468,
|
|
"calib/step_q_c_n": 235.0,
|
|
"calib/step_q_gap": -0.0011852934032837714,
|
|
"calib/step_q_w": 0.3001980593607306,
|
|
"calib/step_q_w_n": 1752.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2666.0,
|
|
"completions/max_terminated_length": 2666.0,
|
|
"completions/mean_length": 299.01953125,
|
|
"completions/mean_terminated_length": 299.01953125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.08213333333333334,
|
|
"grad_norm": 0.016655778512358665,
|
|
"learning_rate": 3.416666666666667e-06,
|
|
"loss": 0.269,
|
|
"num_tokens": 16013915.0,
|
|
"reward": 0.9746717214584351,
|
|
"reward_std": 0.10004068166017532,
|
|
"rewards/accuracy_reward_step": 0.125,
|
|
"rewards/final_brier_reward_step": 0.8438748121261597,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"step": 77
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8313043918460608,
|
|
"aux_distill/mean_u": 0.3717599451626029,
|
|
"aux_distill/n_active_tok": 313.375,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.37681159420289856,
|
|
"calib/avg_num_step_conf": 10.234375,
|
|
"calib/ece": 0.16553174603174603,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.03759420289855073,
|
|
"calib/mean_conf": 0.14176984126984127,
|
|
"calib/mu_c": 0.1108888888888889,
|
|
"calib/mu_w": 0.14848309178743962,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.06436507936507935,
|
|
"calib/std_conf": 0.13602822201914236,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.28372781065088754,
|
|
"calib/step_q_c_n": 338.0,
|
|
"calib/step_q_gap": -0.019563162179962623,
|
|
"calib/step_q_w": 0.30329097283085016,
|
|
"calib/step_q_w_n": 2282.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 3031.0,
|
|
"completions/max_terminated_length": 3031.0,
|
|
"completions/mean_length": 362.7734375,
|
|
"completions/mean_terminated_length": 365.6299133300781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 89.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.019840573891997337,
|
|
"learning_rate": 3.3888888888888893e-06,
|
|
"loss": 0.3363,
|
|
"num_tokens": 16214809.0,
|
|
"reward": 0.9848675727844238,
|
|
"reward_std": 0.09170843660831451,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.8095788955688477,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 78
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8580750748515129,
|
|
"aux_distill/mean_u": 0.3492695963216024,
|
|
"aux_distill/n_active_tok": 288.25,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5498194538478899,
|
|
"calib/avg_num_step_conf": 9.7421875,
|
|
"calib/ece": 0.09699604743083001,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.021831415030467177,
|
|
"calib/mean_conf": 0.1082213438735178,
|
|
"calib/mu_c": 0.12642857142857145,
|
|
"calib/mu_w": 0.10459715639810427,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.019604743083003952,
|
|
"calib/std_conf": 0.11020820405074769,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.4059787581699347,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": 0.10781157352642462,
|
|
"calib/step_q_w": 0.29816718464351005,
|
|
"calib/step_q_w_n": 2188.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1478.0,
|
|
"completions/max_terminated_length": 1478.0,
|
|
"completions/mean_length": 323.7109375,
|
|
"completions/mean_terminated_length": 327.5494079589844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.08426666666666667,
|
|
"grad_norm": 0.012313942424952984,
|
|
"learning_rate": 3.3611111111111117e-06,
|
|
"loss": 0.1974,
|
|
"num_tokens": 16404055.0,
|
|
"reward": 0.9972343444824219,
|
|
"reward_std": 0.07284507155418396,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.8421249985694885,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 79
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9100547656416893,
|
|
"aux_distill/mean_u": 0.3595352341716097,
|
|
"aux_distill/n_active_tok": 270.25,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5754641909814323,
|
|
"calib/avg_num_step_conf": 8.84375,
|
|
"calib/ece": 0.14763636363636362,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.025727320954907165,
|
|
"calib/mean_conf": 0.10706719367588934,
|
|
"calib/mu_c": 0.12689655172413794,
|
|
"calib/mu_w": 0.10116923076923078,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.012727272727272724,
|
|
"calib/std_conf": 0.11064817757389835,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.36373684210526314,
|
|
"calib/step_q_c_n": 380.0,
|
|
"calib/step_q_gap": 0.03152336015197221,
|
|
"calib/step_q_w": 0.3322134819532909,
|
|
"calib/step_q_w_n": 1884.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2735.0,
|
|
"completions/max_terminated_length": 2735.0,
|
|
"completions/mean_length": 320.18359375,
|
|
"completions/mean_terminated_length": 322.7047119140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 71.0,
|
|
"epoch": 0.08533333333333333,
|
|
"grad_norm": 0.01683088205754757,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.3057,
|
|
"num_tokens": 16588182.0,
|
|
"reward": 1.0053168535232544,
|
|
"reward_std": 0.08265817165374756,
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
|
"rewards/final_brier_reward_step": 0.7957901358604431,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 80
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8782324083149433,
|
|
"aux_distill/mean_u": 0.3618072356264667,
|
|
"aux_distill/n_active_tok": 303.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5356892523364487,
|
|
"calib/avg_num_step_conf": 9.46875,
|
|
"calib/ece": 0.09224606299212595,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.005579439252336435,
|
|
"calib/mean_conf": 0.09654921259842521,
|
|
"calib/mu_c": 0.10124999999999999,
|
|
"calib/mu_w": 0.09567056074766356,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.015657480314960632,
|
|
"calib/std_conf": 0.10259747880362516,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3253183520599251,
|
|
"calib/step_q_c_n": 267.0,
|
|
"calib/step_q_gap": 0.032672686784078975,
|
|
"calib/step_q_w": 0.2926456652758461,
|
|
"calib/step_q_w_n": 2157.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2849.0,
|
|
"completions/max_terminated_length": 2849.0,
|
|
"completions/mean_length": 353.26953125,
|
|
"completions/mean_terminated_length": 353.26953125,
|
|
"completions/min_length": 92.0,
|
|
"completions/min_terminated_length": 92.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.01895328424870968,
|
|
"learning_rate": 3.3055555555555558e-06,
|
|
"loss": 0.3637,
|
|
"num_tokens": 16784867.0,
|
|
"reward": 0.9981613159179688,
|
|
"reward_std": 0.05369744449853897,
|
|
"rewards/accuracy_reward_step": 0.15625,
|
|
"rewards/final_brier_reward_step": 0.847885251045227,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 81
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8312947601079941,
|
|
"aux_distill/mean_u": 0.3230309421148612,
|
|
"aux_distill/n_active_tok": 321.375,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5915977961432507,
|
|
"calib/avg_num_step_conf": 10.36328125,
|
|
"calib/ece": 0.07542687747035573,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.03115303030303028,
|
|
"calib/mean_conf": 0.09139525691699607,
|
|
"calib/mu_c": 0.11848484848484847,
|
|
"calib/mu_w": 0.08733181818181819,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.018193675889328062,
|
|
"calib/std_conf": 0.08681989158217782,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.30495833333333333,
|
|
"calib/step_q_c_n": 264.0,
|
|
"calib/step_q_gap": 0.015112540114413264,
|
|
"calib/step_q_w": 0.28984579321892007,
|
|
"calib/step_q_w_n": 2389.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 2439.0,
|
|
"completions/max_terminated_length": 2439.0,
|
|
"completions/mean_length": 374.703125,
|
|
"completions/mean_terminated_length": 377.6535339355469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.08746666666666666,
|
|
"grad_norm": 0.023921169340610504,
|
|
"learning_rate": 3.277777777777778e-06,
|
|
"loss": 0.2663,
|
|
"num_tokens": 16986343.0,
|
|
"reward": 0.9957023859024048,
|
|
"reward_std": 0.0568794310092926,
|
|
"rewards/accuracy_reward_step": 0.12890625,
|
|
"rewards/final_brier_reward_step": 0.8742173910140991,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 82
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8398945815861225,
|
|
"aux_distill/mean_u": 0.32894001525501954,
|
|
"aux_distill/n_active_tok": 325.125,
|
|
"calib/answer_extract_rate": 0.98046875,
|
|
"calib/auroc": 0.5557356887298748,
|
|
"calib/avg_num_step_conf": 11.515625,
|
|
"calib/ece": 0.11826693227091632,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.014943537567084025,
|
|
"calib/mean_conf": 0.08715139442231076,
|
|
"calib/mu_c": 0.0995348837209302,
|
|
"calib/mu_w": 0.08459134615384617,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.01705179282868526,
|
|
"calib/std_conf": 0.09748061911802404,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27858310626703,
|
|
"calib/step_q_c_n": 367.0,
|
|
"calib/step_q_gap": 0.006843857913678586,
|
|
"calib/step_q_w": 0.2717392483533514,
|
|
"calib/step_q_w_n": 2581.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2542.0,
|
|
"completions/max_terminated_length": 2542.0,
|
|
"completions/mean_length": 363.55078125,
|
|
"completions/mean_terminated_length": 369.3214416503906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 67.0,
|
|
"epoch": 0.08853333333333334,
|
|
"grad_norm": 0.012897885404527187,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.269,
|
|
"num_tokens": 17186676.0,
|
|
"reward": 0.9851524233818054,
|
|
"reward_std": 0.09537991881370544,
|
|
"rewards/accuracy_reward_step": 0.16796875,
|
|
"rewards/final_brier_reward_step": 0.8257735967636108,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 83
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8542652856558561,
|
|
"aux_distill/mean_u": 0.3600303893154187,
|
|
"aux_distill/n_active_tok": 357.25,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.45500782472613455,
|
|
"calib/avg_num_step_conf": 12.5,
|
|
"calib/ece": 0.09252610441767069,
|
|
"calib/final_conf_rate": 0.97265625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.017144757433489824,
|
|
"calib/mean_conf": 0.08827710843373494,
|
|
"calib/mu_c": 0.07361111111111111,
|
|
"calib/mu_w": 0.09075586854460094,
|
|
"calib/nonempty_final_conf_rate": 0.97265625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.018112449799196788,
|
|
"calib/std_conf": 0.08681147647554294,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3660130718954248,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": 0.10061362476342756,
|
|
"calib/step_q_w": 0.26539944713199726,
|
|
"calib/step_q_w_n": 2894.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 3023.0,
|
|
"completions/max_terminated_length": 3023.0,
|
|
"completions/mean_length": 395.32421875,
|
|
"completions/mean_terminated_length": 403.19921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.012995940633118153,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": 0.2734,
|
|
"num_tokens": 17393799.0,
|
|
"reward": 0.9755528569221497,
|
|
"reward_std": 0.09832397103309631,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.8378244638442993,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 84
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8209226354956627,
|
|
"aux_distill/mean_u": 0.3484133899235399,
|
|
"aux_distill/n_active_tok": 341.375,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.4757109557109557,
|
|
"calib/avg_num_step_conf": 11.7265625,
|
|
"calib/ece": 0.17984,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.01892773892773894,
|
|
"calib/mean_conf": 0.0864,
|
|
"calib/mu_c": 0.07163636363636364,
|
|
"calib/mu_w": 0.09056410256410258,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.02312,
|
|
"calib/std_conf": 0.10659756094770649,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3235185185185185,
|
|
"calib/step_q_c_n": 432.0,
|
|
"calib/step_q_gap": 0.08762163135898537,
|
|
"calib/step_q_w": 0.2358968871595331,
|
|
"calib/step_q_w_n": 2570.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 2845.0,
|
|
"completions/max_terminated_length": 2845.0,
|
|
"completions/mean_length": 378.8984375,
|
|
"completions/mean_terminated_length": 384.9127197265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.09066666666666667,
|
|
"grad_norm": 0.020151106640696526,
|
|
"learning_rate": 3.1944444444444443e-06,
|
|
"loss": 0.2458,
|
|
"num_tokens": 17598621.0,
|
|
"reward": 0.9827597141265869,
|
|
"reward_std": 0.09446272999048233,
|
|
"rewards/accuracy_reward_step": 0.21484375,
|
|
"rewards/final_brier_reward_step": 0.7741132974624634,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 85
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8645725585520267,
|
|
"aux_distill/mean_u": 0.3442900409885884,
|
|
"aux_distill/n_active_tok": 297.375,
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.43505874880914575,
|
|
"calib/avg_num_step_conf": 12.54296875,
|
|
"calib/ece": 0.1632258064516129,
|
|
"calib/final_conf_rate": 0.96875,
|
|
"calib/format_rate": 0.96484375,
|
|
"calib/frac_conf_gt_0.9": 0.004032258064516129,
|
|
"calib/gap": -0.022392293849899464,
|
|
"calib/mean_conf": 0.06346774193548388,
|
|
"calib/mu_c": 0.0453191489361702,
|
|
"calib/mu_w": 0.06771144278606966,
|
|
"calib/nonempty_final_conf_rate": 0.96875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.01858870967741936,
|
|
"calib/std_conf": 0.09685826321468737,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.29846846846846853,
|
|
"calib/step_q_c_n": 333.0,
|
|
"calib/step_q_gap": 0.06393997646012942,
|
|
"calib/step_q_w": 0.2345284920083391,
|
|
"calib/step_q_w_n": 2878.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03125,
|
|
"completions/max_length": 1411.0,
|
|
"completions/max_terminated_length": 1411.0,
|
|
"completions/mean_length": 330.70703125,
|
|
"completions/mean_terminated_length": 341.375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.09173333333333333,
|
|
"grad_norm": 0.026438122615218163,
|
|
"learning_rate": 3.1666666666666667e-06,
|
|
"loss": 0.2352,
|
|
"num_tokens": 17788794.0,
|
|
"reward": 0.966668963432312,
|
|
"reward_std": 0.10759761929512024,
|
|
"rewards/accuracy_reward_step": 0.18359375,
|
|
"rewards/final_brier_reward_step": 0.7849003672599792,
|
|
"rewards/format_reward_step": 0.96484375,
|
|
"step": 86
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8219904322177172,
|
|
"aux_distill/mean_u": 0.3128133960194839,
|
|
"aux_distill/n_active_tok": 360.375,
|
|
"calib/answer_extract_rate": 0.97265625,
|
|
"calib/auroc": 0.546790994623656,
|
|
"calib/avg_num_step_conf": 12.671875,
|
|
"calib/ece": 0.20407999999999998,
|
|
"calib/final_conf_rate": 0.9765625,
|
|
"calib/format_rate": 0.97265625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.01063340053763441,
|
|
"calib/mean_conf": 0.059120000000000006,
|
|
"calib/mu_c": 0.06703125000000001,
|
|
"calib/mu_w": 0.056397849462365604,
|
|
"calib/nonempty_final_conf_rate": 0.9765625,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0036000000000000003,
|
|
"calib/std_conf": 0.06984,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2833517495395948,
|
|
"calib/step_q_c_n": 543.0,
|
|
"calib/step_q_gap": 0.020853453233189767,
|
|
"calib/step_q_w": 0.26249829630640503,
|
|
"calib/step_q_w_n": 2701.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 2720.0,
|
|
"completions/max_terminated_length": 2720.0,
|
|
"completions/mean_length": 392.44921875,
|
|
"completions/mean_terminated_length": 400.2669372558594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.02165599912405014,
|
|
"learning_rate": 3.138888888888889e-06,
|
|
"loss": 0.3066,
|
|
"num_tokens": 17994757.0,
|
|
"reward": 0.9853259921073914,
|
|
"reward_std": 0.09768631309270859,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7479957342147827,
|
|
"rewards/format_reward_step": 0.97265625,
|
|
"step": 87
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8673255424946547,
|
|
"aux_distill/mean_u": 0.3248154467273917,
|
|
"aux_distill/n_active_tok": 315.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.507420091324201,
|
|
"calib/avg_num_step_conf": 10.01171875,
|
|
"calib/ece": 0.11343529411764706,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.005678462709284626,
|
|
"calib/mean_conf": 0.056682352941176475,
|
|
"calib/mu_c": 0.05180555555555556,
|
|
"calib/mu_w": 0.05748401826484019,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.014470588235294117,
|
|
"calib/std_conf": 0.08278811297140205,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.28467619047619047,
|
|
"calib/step_q_c_n": 315.0,
|
|
"calib/step_q_gap": -0.005169005253346903,
|
|
"calib/step_q_w": 0.2898451957295374,
|
|
"calib/step_q_w_n": 2248.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1366.0,
|
|
"completions/max_terminated_length": 1366.0,
|
|
"completions/mean_length": 372.55078125,
|
|
"completions/mean_terminated_length": 374.01177978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 67.0,
|
|
"epoch": 0.09386666666666667,
|
|
"grad_norm": 0.011959103867411613,
|
|
"learning_rate": 3.1111111111111116e-06,
|
|
"loss": 0.28,
|
|
"num_tokens": 18199978.0,
|
|
"reward": 0.9983651638031006,
|
|
"reward_std": 0.03053853288292885,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.860011637210846,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 88
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8707714267075062,
|
|
"aux_distill/mean_u": 0.35302710012356325,
|
|
"aux_distill/n_active_tok": 327.625,
|
|
"calib/answer_extract_rate": 0.9765625,
|
|
"calib/auroc": 0.6254841649578493,
|
|
"calib/avg_num_step_conf": 10.9140625,
|
|
"calib/ece": 0.13034661354581673,
|
|
"calib/final_conf_rate": 0.98046875,
|
|
"calib/format_rate": 0.9765625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.018764411027568914,
|
|
"calib/mean_conf": 0.03961354581673306,
|
|
"calib/mu_c": 0.05523809523809523,
|
|
"calib/mu_w": 0.03647368421052632,
|
|
"calib/nonempty_final_conf_rate": 0.98046875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0013147410358565737,
|
|
"calib/std_conf": 0.047545805716298926,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2900466216216216,
|
|
"calib/step_q_c_n": 296.0,
|
|
"calib/step_q_gap": 0.029466957890636802,
|
|
"calib/step_q_w": 0.2605796637309848,
|
|
"calib/step_q_w_n": 2498.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3048.0,
|
|
"completions/max_terminated_length": 3048.0,
|
|
"completions/mean_length": 390.97265625,
|
|
"completions/mean_terminated_length": 395.60870361328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.09493333333333333,
|
|
"grad_norm": 0.012375755235552788,
|
|
"learning_rate": 3.0833333333333336e-06,
|
|
"loss": 0.2737,
|
|
"num_tokens": 18408955.0,
|
|
"reward": 0.9838107824325562,
|
|
"reward_std": 0.06585811078548431,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.8269965648651123,
|
|
"rewards/format_reward_step": 0.9765625,
|
|
"step": 89
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8781138341873884,
|
|
"aux_distill/mean_u": 0.35365546245273183,
|
|
"aux_distill/n_active_tok": 309.0,
|
|
"calib/answer_extract_rate": 0.984375,
|
|
"calib/auroc": 0.5553408480944713,
|
|
"calib/avg_num_step_conf": 10.75390625,
|
|
"calib/ece": 0.15353174603174605,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0037584541062802013,
|
|
"calib/mean_conf": 0.03424603174603174,
|
|
"calib/mu_c": 0.037333333333333336,
|
|
"calib/mu_w": 0.033574879227053135,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.004603174603174601,
|
|
"calib/std_conf": 0.045292556329447065,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2954569732937686,
|
|
"calib/step_q_c_n": 337.0,
|
|
"calib/step_q_gap": 0.03972734261082156,
|
|
"calib/step_q_w": 0.255729630682947,
|
|
"calib/step_q_w_n": 2416.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 2454.0,
|
|
"completions/max_terminated_length": 2454.0,
|
|
"completions/mean_length": 347.81640625,
|
|
"completions/mean_terminated_length": 351.94073486328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.01080415491014719,
|
|
"learning_rate": 3.055555555555556e-06,
|
|
"loss": 0.3039,
|
|
"num_tokens": 18601316.0,
|
|
"reward": 0.9893505573272705,
|
|
"reward_std": 0.05818493664264679,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.818544864654541,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 90
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8643928486853838,
|
|
"aux_distill/mean_u": 0.3049086768321826,
|
|
"aux_distill/n_active_tok": 293.75,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5152314814814815,
|
|
"calib/avg_num_step_conf": 9.6328125,
|
|
"calib/ece": 0.17846456692913387,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003268518518518518,
|
|
"calib/mean_conf": 0.03594488188976378,
|
|
"calib/mu_c": 0.03851851851851852,
|
|
"calib/mu_w": 0.035250000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0009055118110236221,
|
|
"calib/std_conf": 0.03876657523814441,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.28237416481069044,
|
|
"calib/step_q_c_n": 449.0,
|
|
"calib/step_q_gap": -0.01975587981003335,
|
|
"calib/step_q_w": 0.3021300446207238,
|
|
"calib/step_q_w_n": 2017.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2455.0,
|
|
"completions/max_terminated_length": 2455.0,
|
|
"completions/mean_length": 360.91015625,
|
|
"completions/mean_terminated_length": 362.32550048828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.09706666666666666,
|
|
"grad_norm": 0.010622053407132626,
|
|
"learning_rate": 3.0277777777777776e-06,
|
|
"loss": 0.3275,
|
|
"num_tokens": 18801421.0,
|
|
"reward": 0.9989259243011475,
|
|
"reward_std": 0.03695327788591385,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.7947269678115845,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 91
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8596785441040993,
|
|
"aux_distill/mean_u": 0.2708570878329426,
|
|
"aux_distill/n_active_tok": 268.625,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.5676567656765676,
|
|
"calib/avg_num_step_conf": 8.78125,
|
|
"calib/ece": 0.17711462450592885,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.006685109687439333,
|
|
"calib/mean_conf": 0.02446640316205534,
|
|
"calib/mu_c": 0.02980392156862745,
|
|
"calib/mu_w": 0.02311881188118812,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.028049431551867763,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.35421364985163206,
|
|
"calib/step_q_c_n": 337.0,
|
|
"calib/step_q_gap": 0.0886029748123856,
|
|
"calib/step_q_w": 0.26561067503924646,
|
|
"calib/step_q_w_n": 1911.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1140.0,
|
|
"completions/max_terminated_length": 1140.0,
|
|
"completions/mean_length": 325.58984375,
|
|
"completions/mean_terminated_length": 329.4505920410156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.09813333333333334,
|
|
"grad_norm": 0.011056625284254551,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.2696,
|
|
"num_tokens": 18991492.0,
|
|
"reward": 0.9935341477394104,
|
|
"reward_std": 0.04476837068796158,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.7995683550834656,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 92
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8805052265524864,
|
|
"aux_distill/mean_u": 0.2796066804729568,
|
|
"aux_distill/n_active_tok": 286.875,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5519877675840978,
|
|
"calib/avg_num_step_conf": 9.578125,
|
|
"calib/ece": 0.11880708661417326,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00013302752293577885,
|
|
"calib/mean_conf": 0.031114173228346458,
|
|
"calib/mu_c": 0.031000000000000003,
|
|
"calib/mu_w": 0.031133027522935782,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.004094488188976378,
|
|
"calib/std_conf": 0.052249088790471354,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.269597510373444,
|
|
"calib/step_q_c_n": 241.0,
|
|
"calib/step_q_gap": -0.010523339920540609,
|
|
"calib/step_q_w": 0.2801208502939846,
|
|
"calib/step_q_w_n": 2211.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1061.0,
|
|
"completions/max_terminated_length": 1061.0,
|
|
"completions/mean_length": 349.859375,
|
|
"completions/mean_terminated_length": 352.6141662597656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.013927971012890339,
|
|
"learning_rate": 2.9722222222222225e-06,
|
|
"loss": 0.275,
|
|
"num_tokens": 19186832.0,
|
|
"reward": 0.9947122931480408,
|
|
"reward_std": 0.030827995389699936,
|
|
"rewards/accuracy_reward_step": 0.140625,
|
|
"rewards/final_brier_reward_step": 0.8566120862960815,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 93
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8493856117129326,
|
|
"aux_distill/mean_u": 0.2487502605932423,
|
|
"aux_distill/n_active_tok": 252.625,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5254245283018868,
|
|
"calib/avg_num_step_conf": 8.86328125,
|
|
"calib/ece": 0.1831620553359684,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.009423584905660372,
|
|
"calib/mean_conf": 0.026324110671936764,
|
|
"calib/mu_c": 0.033773584905660375,
|
|
"calib/mu_w": 0.024350000000000004,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.03887150327041116,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.25506896551724134,
|
|
"calib/step_q_c_n": 406.0,
|
|
"calib/step_q_gap": 0.00925254039646839,
|
|
"calib/step_q_w": 0.24581642512077295,
|
|
"calib/step_q_w_n": 1863.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1706.0,
|
|
"completions/max_terminated_length": 1706.0,
|
|
"completions/mean_length": 307.9765625,
|
|
"completions/mean_terminated_length": 310.4015808105469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.10026666666666667,
|
|
"grad_norm": 0.01232621818780899,
|
|
"learning_rate": 2.944444444444445e-06,
|
|
"loss": 0.2311,
|
|
"num_tokens": 19374354.0,
|
|
"reward": 0.9922507405281067,
|
|
"reward_std": 0.05277451127767563,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.7891890406608582,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"step": 94
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8817452136427164,
|
|
"aux_distill/mean_u": 0.30189744452687745,
|
|
"aux_distill/n_active_tok": 239.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5293927125506074,
|
|
"calib/avg_num_step_conf": 7.79296875,
|
|
"calib/ece": 0.23651372549019606,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0016769230769230793,
|
|
"calib/mean_conf": 0.02442745098039216,
|
|
"calib/mu_c": 0.025676923076923076,
|
|
"calib/mu_w": 0.023999999999999997,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.003019607843137255,
|
|
"calib/std_conf": 0.03759653758792873,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3133485193621867,
|
|
"calib/step_q_c_n": 439.0,
|
|
"calib/step_q_gap": 0.05890436769123558,
|
|
"calib/step_q_w": 0.25444415167095114,
|
|
"calib/step_q_w_n": 1556.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1162.0,
|
|
"completions/max_terminated_length": 1162.0,
|
|
"completions/mean_length": 290.3671875,
|
|
"completions/mean_terminated_length": 291.5058898925781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.10133333333333333,
|
|
"grad_norm": 0.013984967023134232,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": 0.2771,
|
|
"num_tokens": 19554816.0,
|
|
"reward": 1.0016120672225952,
|
|
"reward_std": 0.02413991466164589,
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
|
"rewards/final_brier_reward_step": 0.75322425365448,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 95
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8744475580751896,
|
|
"aux_distill/mean_u": 0.3089908472147716,
|
|
"aux_distill/n_active_tok": 243.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4525542701156896,
|
|
"calib/avg_num_step_conf": 7.66015625,
|
|
"calib/ece": 0.36756862745098035,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0075685688288054005,
|
|
"calib/mean_conf": 0.02333333333333333,
|
|
"calib/mu_c": 0.018673469387755104,
|
|
"calib/mu_w": 0.026242038216560504,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0032941176470588232,
|
|
"calib/std_conf": 0.04555826550205304,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2549625187406297,
|
|
"calib/step_q_c_n": 667.0,
|
|
"calib/step_q_gap": -0.0016547919239762177,
|
|
"calib/step_q_w": 0.2566173106646059,
|
|
"calib/step_q_w_n": 1294.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1618.0,
|
|
"completions/max_terminated_length": 1618.0,
|
|
"completions/mean_length": 290.7109375,
|
|
"completions/mean_terminated_length": 291.8509826660156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.015029377304017544,
|
|
"learning_rate": 2.888888888888889e-06,
|
|
"loss": 0.2411,
|
|
"num_tokens": 19735054.0,
|
|
"reward": 0.9980310201644897,
|
|
"reward_std": 0.027630947530269623,
|
|
"rewards/accuracy_reward_step": 0.3828125,
|
|
"rewards/final_brier_reward_step": 0.6210620999336243,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 96
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9030767139047384,
|
|
"aux_distill/mean_u": 0.30071125929448483,
|
|
"aux_distill/n_active_tok": 209.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5594911937377691,
|
|
"calib/avg_num_step_conf": 6.85546875,
|
|
"calib/ece": 0.11738582677165352,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003496412263535552,
|
|
"calib/mean_conf": 0.02584251968503937,
|
|
"calib/mu_c": 0.02885714285714286,
|
|
"calib/mu_w": 0.025360730593607307,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.002716535433070867,
|
|
"calib/std_conf": 0.03778230748651435,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.23632038834951458,
|
|
"calib/step_q_c_n": 206.0,
|
|
"calib/step_q_gap": -0.023218991895805008,
|
|
"calib/step_q_w": 0.2595393802453196,
|
|
"calib/step_q_w_n": 1549.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1566.0,
|
|
"completions/max_terminated_length": 1566.0,
|
|
"completions/mean_length": 257.7578125,
|
|
"completions/mean_terminated_length": 259.78741455078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.10346666666666667,
|
|
"grad_norm": 0.013639519922435284,
|
|
"learning_rate": 2.861111111111111e-06,
|
|
"loss": 0.2865,
|
|
"num_tokens": 19906112.0,
|
|
"reward": 0.9950933456420898,
|
|
"reward_std": 0.03068731725215912,
|
|
"rewards/accuracy_reward_step": 0.13671875,
|
|
"rewards/final_brier_reward_step": 0.8612803816795349,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 97
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9290945921093225,
|
|
"aux_distill/mean_u": 0.3275131960751481,
|
|
"aux_distill/n_active_tok": 197.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.562069774718398,
|
|
"calib/avg_num_step_conf": 6.24609375,
|
|
"calib/ece": 0.2457421875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.011461201501877342,
|
|
"calib/mean_conf": 0.023789062500000003,
|
|
"calib/mu_c": 0.032205882352941174,
|
|
"calib/mu_w": 0.020744680851063832,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.001953125,
|
|
"calib/std_conf": 0.04323015302275825,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.29350133333333334,
|
|
"calib/step_q_c_n": 375.0,
|
|
"calib/step_q_gap": 0.04622764052287581,
|
|
"calib/step_q_w": 0.24727369281045752,
|
|
"calib/step_q_w_n": 1224.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 725.0,
|
|
"completions/max_terminated_length": 725.0,
|
|
"completions/mean_length": 244.2578125,
|
|
"completions/mean_terminated_length": 245.2156982421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.10453333333333334,
|
|
"grad_norm": 0.022586099803447723,
|
|
"learning_rate": 2.8333333333333335e-06,
|
|
"loss": 0.298,
|
|
"num_tokens": 20074826.0,
|
|
"reward": 1.003431797027588,
|
|
"reward_std": 0.02640916034579277,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7451449632644653,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 98
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8971052411943674,
|
|
"aux_distill/mean_u": 0.2990826274790156,
|
|
"aux_distill/n_active_tok": 205.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5872300376461264,
|
|
"calib/avg_num_step_conf": 6.41015625,
|
|
"calib/ece": 0.17105882352941176,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0056597978997424275,
|
|
"calib/mean_conf": 0.025019607843137257,
|
|
"calib/mu_c": 0.029591836734693882,
|
|
"calib/mu_w": 0.023932038834951454,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00196078431372549,
|
|
"calib/std_conf": 0.033616925123338894,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3081704035874439,
|
|
"calib/step_q_c_n": 223.0,
|
|
"calib/step_q_gap": 0.025302843643861372,
|
|
"calib/step_q_w": 0.28286755994358254,
|
|
"calib/step_q_w_n": 1418.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 964.0,
|
|
"completions/max_terminated_length": 964.0,
|
|
"completions/mean_length": 245.54296875,
|
|
"completions/mean_terminated_length": 246.50588989257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.015454606153070927,
|
|
"learning_rate": 2.805555555555556e-06,
|
|
"loss": 0.2625,
|
|
"num_tokens": 20243485.0,
|
|
"reward": 1.0008832216262817,
|
|
"reward_std": 0.021229008212685585,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.8142663836479187,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 99
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9302643742412329,
|
|
"aux_distill/mean_u": 0.34172670215847983,
|
|
"aux_distill/n_active_tok": 188.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5127572898799313,
|
|
"calib/avg_num_step_conf": 5.9296875,
|
|
"calib/ece": 0.15816406249999998,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.0029030874785591805,
|
|
"calib/mean_conf": 0.026914062500000002,
|
|
"calib/mu_c": 0.02931818181818182,
|
|
"calib/mu_w": 0.02641509433962264,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0066015625,
|
|
"calib/std_conf": 0.07529790079906673,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2816595744680851,
|
|
"calib/step_q_c_n": 235.0,
|
|
"calib/step_q_gap": 0.029535646175021968,
|
|
"calib/step_q_w": 0.2521239282930631,
|
|
"calib/step_q_w_n": 1283.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 829.0,
|
|
"completions/max_terminated_length": 829.0,
|
|
"completions/mean_length": 240.265625,
|
|
"completions/mean_terminated_length": 241.20785522460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 71.0,
|
|
"epoch": 0.10666666666666667,
|
|
"grad_norm": 0.025083284825086594,
|
|
"learning_rate": 2.7777777777777783e-06,
|
|
"loss": 0.2703,
|
|
"num_tokens": 20412401.0,
|
|
"reward": 1.0018420219421387,
|
|
"reward_std": 0.017068054527044296,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.8318089842796326,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9050206709653139,
|
|
"aux_distill/mean_u": 0.29593671982755226,
|
|
"aux_distill/n_active_tok": 216.5,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5083482409063804,
|
|
"calib/avg_num_step_conf": 6.78515625,
|
|
"calib/ece": 0.1396456692913386,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0015348837209302295,
|
|
"calib/mean_conf": 0.021299212598425196,
|
|
"calib/mu_c": 0.020000000000000004,
|
|
"calib/mu_w": 0.021534883720930233,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0037007874015748026,
|
|
"calib/std_conf": 0.02894953930810734,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26361607142857146,
|
|
"calib/step_q_c_n": 224.0,
|
|
"calib/step_q_gap": -0.05943660537248602,
|
|
"calib/step_q_w": 0.3230526768010575,
|
|
"calib/step_q_w_n": 1513.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2364.0,
|
|
"completions/max_terminated_length": 2364.0,
|
|
"completions/mean_length": 281.3125,
|
|
"completions/mean_terminated_length": 282.41571044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 70.0,
|
|
"epoch": 0.10773333333333333,
|
|
"grad_norm": 0.0935661792755127,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.3368,
|
|
"num_tokens": 20591409.0,
|
|
"reward": 0.9945935010910034,
|
|
"reward_std": 0.02838076651096344,
|
|
"rewards/accuracy_reward_step": 0.15234375,
|
|
"rewards/final_brier_reward_step": 0.8446558117866516,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 101
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8885838855057955,
|
|
"aux_distill/mean_u": 0.3037922940254807,
|
|
"aux_distill/n_active_tok": 198.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5216058085436346,
|
|
"calib/avg_num_step_conf": 6.1875,
|
|
"calib/ece": 0.29317647058823526,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.00392156862745098,
|
|
"calib/gap": 0.022142957845763426,
|
|
"calib/mean_conf": 0.0283921568627451,
|
|
"calib/mu_c": 0.04341463414634146,
|
|
"calib/mu_w": 0.021271676300578034,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.07588021479482467,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.30633569739952715,
|
|
"calib/step_q_c_n": 423.0,
|
|
"calib/step_q_gap": 0.017407187494273058,
|
|
"calib/step_q_w": 0.2889285099052541,
|
|
"calib/step_q_w_n": 1161.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2648.0,
|
|
"completions/max_terminated_length": 2648.0,
|
|
"completions/mean_length": 238.81640625,
|
|
"completions/mean_terminated_length": 238.81640625,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.07671157270669937,
|
|
"learning_rate": 2.7222222222222224e-06,
|
|
"loss": 0.2947,
|
|
"num_tokens": 20759242.0,
|
|
"reward": 1.0067307949066162,
|
|
"reward_std": 0.03035442717373371,
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
|
"rewards/final_brier_reward_step": 0.697055459022522,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 102
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9347370229661465,
|
|
"aux_distill/mean_u": 0.31828543972856727,
|
|
"aux_distill/n_active_tok": 190.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5022981781714858,
|
|
"calib/avg_num_step_conf": 5.96484375,
|
|
"calib/ece": 0.2288235294117647,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.001407320742102619,
|
|
"calib/mean_conf": 0.026549019607843144,
|
|
"calib/mu_c": 0.02548387096774194,
|
|
"calib/mu_w": 0.02689119170984456,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00611764705882353,
|
|
"calib/std_conf": 0.054863936412243276,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2631372549019608,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": -0.03180623404152816,
|
|
"calib/step_q_w": 0.294943488943489,
|
|
"calib/step_q_w_n": 1221.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2700.0,
|
|
"completions/max_terminated_length": 2700.0,
|
|
"completions/mean_length": 257.29296875,
|
|
"completions/mean_terminated_length": 257.29296875,
|
|
"completions/min_length": 73.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.10986666666666667,
|
|
"grad_norm": 0.03052721731364727,
|
|
"learning_rate": 2.6944444444444444e-06,
|
|
"loss": 0.3177,
|
|
"num_tokens": 20929661.0,
|
|
"reward": 1.000415325164795,
|
|
"reward_std": 0.025061478838324547,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.762549638748169,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 103
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8932774048298597,
|
|
"aux_distill/mean_u": 0.2898657778103306,
|
|
"aux_distill/n_active_tok": 189.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4949517867271696,
|
|
"calib/avg_num_step_conf": 5.9375,
|
|
"calib/ece": 0.1467109375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.003585025524673848,
|
|
"calib/mean_conf": 0.0181328125,
|
|
"calib/mu_c": 0.015121951219512198,
|
|
"calib/mu_w": 0.018706976744186046,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0023437500000000003,
|
|
"calib/std_conf": 0.03397560501065204,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.30661643835616437,
|
|
"calib/step_q_c_n": 219.0,
|
|
"calib/step_q_gap": 0.023472702768155163,
|
|
"calib/step_q_w": 0.2831437355880092,
|
|
"calib/step_q_w_n": 1301.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 814.0,
|
|
"completions/max_terminated_length": 814.0,
|
|
"completions/mean_length": 246.51171875,
|
|
"completions/mean_terminated_length": 247.4784393310547,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.11093333333333333,
|
|
"grad_norm": 0.03477464243769646,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.2265,
|
|
"num_tokens": 21099448.0,
|
|
"reward": 1.0016803741455078,
|
|
"reward_std": 0.006197728216648102,
|
|
"rewards/accuracy_reward_step": 0.16015625,
|
|
"rewards/final_brier_reward_step": 0.8432043790817261,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 104
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8812897838652134,
|
|
"aux_distill/mean_u": 0.27382185952685056,
|
|
"aux_distill/n_active_tok": 196.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5686187518485655,
|
|
"calib/avg_num_step_conf": 6.1640625,
|
|
"calib/ece": 0.180328125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0007855664004732289,
|
|
"calib/mean_conf": 0.019125000000000003,
|
|
"calib/mu_c": 0.01848979591836735,
|
|
"calib/mu_w": 0.01927536231884058,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0040234375,
|
|
"calib/std_conf": 0.03281696398206269,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2969296296296296,
|
|
"calib/step_q_c_n": 270.0,
|
|
"calib/step_q_gap": -0.0015341318382602886,
|
|
"calib/step_q_w": 0.2984637614678899,
|
|
"calib/step_q_w_n": 1308.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 746.0,
|
|
"completions/max_terminated_length": 746.0,
|
|
"completions/mean_length": 252.25,
|
|
"completions/mean_terminated_length": 253.23922729492188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0723215714097023,
|
|
"learning_rate": 2.6388888888888893e-06,
|
|
"loss": 0.2398,
|
|
"num_tokens": 21269784.0,
|
|
"reward": 1.0028176307678223,
|
|
"reward_std": 0.007789917290210724,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.8142291307449341,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 105
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9138540420681238,
|
|
"aux_distill/mean_u": 0.3225658485289486,
|
|
"aux_distill/n_active_tok": 176.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4124025974025974,
|
|
"calib/avg_num_step_conf": 5.81640625,
|
|
"calib/ece": 0.12572549019607845,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.007142857142857145,
|
|
"calib/mean_conf": 0.017019607843137254,
|
|
"calib/mu_c": 0.010857142857142857,
|
|
"calib/mu_w": 0.018000000000000002,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.002745098039215686,
|
|
"calib/std_conf": 0.03509845546910577,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3506372549019608,
|
|
"calib/step_q_c_n": 204.0,
|
|
"calib/step_q_gap": 0.06313639887083239,
|
|
"calib/step_q_w": 0.2875008560311284,
|
|
"calib/step_q_w_n": 1285.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 858.0,
|
|
"completions/max_terminated_length": 858.0,
|
|
"completions/mean_length": 233.23828125,
|
|
"completions/mean_terminated_length": 234.1529541015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.11306666666666666,
|
|
"grad_norm": 0.07096699625253677,
|
|
"learning_rate": 2.6111111111111113e-06,
|
|
"loss": 0.2551,
|
|
"num_tokens": 21434077.0,
|
|
"reward": 0.996820330619812,
|
|
"reward_std": 0.015735477209091187,
|
|
"rewards/accuracy_reward_step": 0.13671875,
|
|
"rewards/final_brier_reward_step": 0.8608281016349792,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 106
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9093293808400631,
|
|
"aux_distill/mean_u": 0.30919132201128474,
|
|
"aux_distill/n_active_tok": 197.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.4912209302325581,
|
|
"calib/avg_num_step_conf": 6.3984375,
|
|
"calib/ece": 0.14180392156862745,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.012360465116279072,
|
|
"calib/mean_conf": 0.016078431372549017,
|
|
"calib/mu_c": 0.026500000000000003,
|
|
"calib/mu_w": 0.01413953488372093,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0005098039215686275,
|
|
"calib/std_conf": 0.04103936089460272,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3354362139917696,
|
|
"calib/step_q_c_n": 243.0,
|
|
"calib/step_q_gap": 0.06793012080180544,
|
|
"calib/step_q_w": 0.26750609318996416,
|
|
"calib/step_q_w_n": 1395.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1903.0,
|
|
"completions/max_terminated_length": 1903.0,
|
|
"completions/mean_length": 252.84765625,
|
|
"completions/mean_terminated_length": 253.8392333984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.11413333333333334,
|
|
"grad_norm": 0.02068367414176464,
|
|
"learning_rate": 2.5833333333333337e-06,
|
|
"loss": 0.3011,
|
|
"num_tokens": 21603422.0,
|
|
"reward": 0.9953607320785522,
|
|
"reward_std": 0.030102472752332687,
|
|
"rewards/accuracy_reward_step": 0.15625,
|
|
"rewards/final_brier_reward_step": 0.8422839641571045,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 107
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8714952226728201,
|
|
"aux_distill/mean_u": 0.2846358781803798,
|
|
"aux_distill/n_active_tok": 194.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4913869913869914,
|
|
"calib/avg_num_step_conf": 6.109375,
|
|
"calib/ece": 0.27914453125000005,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.004782150282150286,
|
|
"calib/mean_conf": 0.017183593750000004,
|
|
"calib/mu_c": 0.013783783783783784,
|
|
"calib/mu_w": 0.01856593406593407,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0036328125,
|
|
"calib/std_conf": 0.034780021378443125,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.29395225464190977,
|
|
"calib/step_q_c_n": 377.0,
|
|
"calib/step_q_gap": 0.020776180505431285,
|
|
"calib/step_q_w": 0.2731760741364785,
|
|
"calib/step_q_w_n": 1187.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 893.0,
|
|
"completions/max_terminated_length": 893.0,
|
|
"completions/mean_length": 248.33203125,
|
|
"completions/mean_terminated_length": 249.30589294433594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.03424735739827156,
|
|
"learning_rate": 2.5555555555555557e-06,
|
|
"loss": 0.2994,
|
|
"num_tokens": 21770227.0,
|
|
"reward": 1.0032318830490112,
|
|
"reward_std": 0.008335249498486519,
|
|
"rewards/accuracy_reward_step": 0.2890625,
|
|
"rewards/final_brier_reward_step": 0.7174013257026672,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 108
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8937185276299715,
|
|
"aux_distill/mean_u": 0.27320062215980184,
|
|
"aux_distill/n_active_tok": 207.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4570330167345093,
|
|
"calib/avg_num_step_conf": 6.5546875,
|
|
"calib/ece": 0.204938671875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.001197602894617818,
|
|
"calib/mean_conf": 0.011467578124999999,
|
|
"calib/mu_c": 0.010527272727272729,
|
|
"calib/mu_w": 0.011724875621890547,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00078125,
|
|
"calib/std_conf": 0.018586361021176834,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.264029304029304,
|
|
"calib/step_q_c_n": 273.0,
|
|
"calib/step_q_gap": 0.001200834278414331,
|
|
"calib/step_q_w": 0.2628284697508897,
|
|
"calib/step_q_w_n": 1405.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 882.0,
|
|
"completions/max_terminated_length": 882.0,
|
|
"completions/mean_length": 262.53125,
|
|
"completions/mean_terminated_length": 263.560791015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.11626666666666667,
|
|
"grad_norm": 0.0347769632935524,
|
|
"learning_rate": 2.5277777777777778e-06,
|
|
"loss": 0.2498,
|
|
"num_tokens": 21942035.0,
|
|
"reward": 1.0020232200622559,
|
|
"reward_std": 0.0043099867179989815,
|
|
"rewards/accuracy_reward_step": 0.21484375,
|
|
"rewards/final_brier_reward_step": 0.7892027497291565,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 109
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9149785321205854,
|
|
"aux_distill/mean_u": 0.2944213236797313,
|
|
"aux_distill/n_active_tok": 189.625,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5183720930232557,
|
|
"calib/avg_num_step_conf": 5.92578125,
|
|
"calib/ece": 0.14976470588235294,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0006220930232558129,
|
|
"calib/mean_conf": 0.00827450980392157,
|
|
"calib/mu_c": 0.007750000000000002,
|
|
"calib/mu_w": 0.008372093023255815,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.000588235294117647,
|
|
"calib/std_conf": 0.01403647689355264,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22549107142857144,
|
|
"calib/step_q_c_n": 224.0,
|
|
"calib/step_q_gap": -0.031654326869959115,
|
|
"calib/step_q_w": 0.25714539829853056,
|
|
"calib/step_q_w_n": 1293.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2691.0,
|
|
"completions/max_terminated_length": 2691.0,
|
|
"completions/mean_length": 269.8828125,
|
|
"completions/mean_terminated_length": 269.8828125,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.11733333333333333,
|
|
"grad_norm": 0.024901842698454857,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.3063,
|
|
"num_tokens": 22116045.0,
|
|
"reward": 0.9971724152565002,
|
|
"reward_std": 0.013311240822076797,
|
|
"rewards/accuracy_reward_step": 0.15625,
|
|
"rewards/final_brier_reward_step": 0.84200119972229,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 110
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9114784337580204,
|
|
"aux_distill/mean_u": 0.3028130204678725,
|
|
"aux_distill/n_active_tok": 196.0,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.514842796092796,
|
|
"calib/avg_num_step_conf": 6.34765625,
|
|
"calib/ece": 0.27483070866141734,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008017399267399271,
|
|
"calib/mean_conf": 0.008633858267716536,
|
|
"calib/mu_c": 0.009208333333333334,
|
|
"calib/mu_w": 0.008406593406593407,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.011517694162225249,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2838389830508475,
|
|
"calib/step_q_c_n": 354.0,
|
|
"calib/step_q_gap": 0.03348414434117003,
|
|
"calib/step_q_w": 0.25035483870967745,
|
|
"calib/step_q_w_n": 1271.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2737.0,
|
|
"completions/max_terminated_length": 2737.0,
|
|
"completions/mean_length": 255.3125,
|
|
"completions/mean_terminated_length": 256.3137512207031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.022217007353901863,
|
|
"learning_rate": 2.4722222222222226e-06,
|
|
"loss": 0.3231,
|
|
"num_tokens": 22288813.0,
|
|
"reward": 0.994674563407898,
|
|
"reward_std": 0.026389483362436295,
|
|
"rewards/accuracy_reward_step": 0.28125,
|
|
"rewards/final_brier_reward_step": 0.7159115672111511,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 111
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9168477077037096,
|
|
"aux_distill/mean_u": 0.3094974264635611,
|
|
"aux_distill/n_active_tok": 191.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5288595594386096,
|
|
"calib/avg_num_step_conf": 6.0625,
|
|
"calib/ece": 0.2000078125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0006604703039315915,
|
|
"calib/mean_conf": 0.0070234375,
|
|
"calib/mu_c": 0.007547169811320755,
|
|
"calib/mu_w": 0.006886699507389163,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010927649023627807,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2449549180327869,
|
|
"calib/step_q_c_n": 244.0,
|
|
"calib/step_q_gap": 0.023272196320248656,
|
|
"calib/step_q_w": 0.22168272171253825,
|
|
"calib/step_q_w_n": 1308.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 859.0,
|
|
"completions/max_terminated_length": 859.0,
|
|
"completions/mean_length": 260.26171875,
|
|
"completions/mean_terminated_length": 261.2823791503906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.11946666666666667,
|
|
"grad_norm": 0.04051944240927696,
|
|
"learning_rate": 2.4444444444444447e-06,
|
|
"loss": 0.2796,
|
|
"num_tokens": 22463360.0,
|
|
"reward": 1.0014780759811401,
|
|
"reward_std": 0.0030970366206020117,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.7959250211715698,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 112
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9045503884553909,
|
|
"aux_distill/mean_u": 0.27518384129040346,
|
|
"aux_distill/n_active_tok": 184.625,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5271070869662419,
|
|
"calib/avg_num_step_conf": 5.7734375,
|
|
"calib/ece": 0.1560156862745098,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0032783366867873923,
|
|
"calib/mean_conf": 0.008690196078431373,
|
|
"calib/mu_c": 0.01142857142857143,
|
|
"calib/mu_w": 0.008150234741784038,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014898154312609919,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.23245714285714286,
|
|
"calib/step_q_c_n": 210.0,
|
|
"calib/step_q_gap": -0.044223456511942316,
|
|
"calib/step_q_w": 0.2766805993690852,
|
|
"calib/step_q_w_n": 1268.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1069.0,
|
|
"completions/max_terminated_length": 1069.0,
|
|
"completions/mean_length": 238.265625,
|
|
"completions/mean_terminated_length": 239.20001220703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.12053333333333334,
|
|
"grad_norm": 0.049823589622974396,
|
|
"learning_rate": 2.4166666666666667e-06,
|
|
"loss": 0.274,
|
|
"num_tokens": 22629556.0,
|
|
"reward": 0.9978206157684326,
|
|
"reward_std": 0.015029273927211761,
|
|
"rewards/accuracy_reward_step": 0.1640625,
|
|
"rewards/final_brier_reward_step": 0.8354849815368652,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 113
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8853135239332914,
|
|
"aux_distill/mean_u": 0.2820834696545772,
|
|
"aux_distill/n_active_tok": 185.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5163684811572136,
|
|
"calib/avg_num_step_conf": 5.93359375,
|
|
"calib/ece": 0.2709765625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0013239436619718308,
|
|
"calib/mean_conf": 0.0063671875,
|
|
"calib/mu_c": 0.007323943661971831,
|
|
"calib/mu_w": 0.006,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009168876612750535,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.254423076923077,
|
|
"calib/step_q_c_n": 364.0,
|
|
"calib/step_q_gap": -0.014064368964368856,
|
|
"calib/step_q_w": 0.26848744588744583,
|
|
"calib/step_q_w_n": 1155.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1002.0,
|
|
"completions/max_terminated_length": 1002.0,
|
|
"completions/mean_length": 248.54296875,
|
|
"completions/mean_terminated_length": 249.5176544189453,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 84.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.024916112422943115,
|
|
"learning_rate": 2.388888888888889e-06,
|
|
"loss": 0.2992,
|
|
"num_tokens": 22798207.0,
|
|
"reward": 1.0019688606262207,
|
|
"reward_std": 0.004232214763760567,
|
|
"rewards/accuracy_reward_step": 0.27734375,
|
|
"rewards/final_brier_reward_step": 0.7265941500663757,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 114
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8834676668047905,
|
|
"aux_distill/mean_u": 0.2663964808695613,
|
|
"aux_distill/n_active_tok": 208.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5605055747789311,
|
|
"calib/avg_num_step_conf": 6.52734375,
|
|
"calib/ece": 0.19196078431372549,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0031862745098039224,
|
|
"calib/mean_conf": 0.00803921568627451,
|
|
"calib/mu_c": 0.010588235294117648,
|
|
"calib/mu_w": 0.007401960784313726,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.014527810766229086,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.352641975308642,
|
|
"calib/step_q_c_n": 324.0,
|
|
"calib/step_q_gap": 0.05986988919134434,
|
|
"calib/step_q_w": 0.29277208611729766,
|
|
"calib/step_q_w_n": 1347.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1174.0,
|
|
"completions/max_terminated_length": 1174.0,
|
|
"completions/mean_length": 273.64453125,
|
|
"completions/mean_terminated_length": 274.7176513671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 76.0,
|
|
"epoch": 0.12266666666666666,
|
|
"grad_norm": 0.019408583641052246,
|
|
"learning_rate": 2.361111111111111e-06,
|
|
"loss": 0.2465,
|
|
"num_tokens": 22973524.0,
|
|
"reward": 0.9980658292770386,
|
|
"reward_std": 0.015270305797457695,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.8008191585540771,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 115
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8767251344397664,
|
|
"aux_distill/mean_u": 0.2608851315583497,
|
|
"aux_distill/n_active_tok": 212.625,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5329034391534392,
|
|
"calib/avg_num_step_conf": 6.64453125,
|
|
"calib/ece": 0.24080000000000001,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00045734126984127,
|
|
"calib/mean_conf": 0.0062588235294117655,
|
|
"calib/mu_c": 0.006603174603174604,
|
|
"calib/mu_w": 0.006145833333333334,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010048298587237041,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27004804804804805,
|
|
"calib/step_q_c_n": 333.0,
|
|
"calib/step_q_gap": 0.026137229334597778,
|
|
"calib/step_q_w": 0.24391081871345027,
|
|
"calib/step_q_w_n": 1368.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2592.0,
|
|
"completions/max_terminated_length": 2592.0,
|
|
"completions/mean_length": 286.75,
|
|
"completions/mean_terminated_length": 286.75,
|
|
"completions/min_length": 69.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.12373333333333333,
|
|
"grad_norm": 0.03141540288925171,
|
|
"learning_rate": 2.3333333333333336e-06,
|
|
"loss": 0.2802,
|
|
"num_tokens": 23151452.0,
|
|
"reward": 0.9976489543914795,
|
|
"reward_std": 0.014305486343801022,
|
|
"rewards/accuracy_reward_step": 0.24609375,
|
|
"rewards/final_brier_reward_step": 0.753110408782959,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 116
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8591549377888441,
|
|
"aux_distill/mean_u": 0.24090663719646957,
|
|
"aux_distill/n_active_tok": 207.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.551553449183781,
|
|
"calib/avg_num_step_conf": 6.515625,
|
|
"calib/ece": 0.16746093749999996,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.013901000526592946,
|
|
"calib/mean_conf": 0.0083203125,
|
|
"calib/mu_c": 0.01977777777777778,
|
|
"calib/mu_w": 0.005876777251184834,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.03791323020928635,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.30927450980392157,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": 0.040909752094670515,
|
|
"calib/step_q_w": 0.26836475770925106,
|
|
"calib/step_q_w_n": 1362.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 925.0,
|
|
"completions/max_terminated_length": 925.0,
|
|
"completions/mean_length": 280.37890625,
|
|
"completions/mean_terminated_length": 281.47845458984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.012358613312244415,
|
|
"learning_rate": 2.305555555555556e-06,
|
|
"loss": 0.2917,
|
|
"num_tokens": 23329829.0,
|
|
"reward": 1.002723217010498,
|
|
"reward_std": 0.006827778648585081,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.8296651840209961,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 117
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8510748287662864,
|
|
"aux_distill/mean_u": 0.23291316663587935,
|
|
"aux_distill/n_active_tok": 196.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5326105886450714,
|
|
"calib/avg_num_step_conf": 6.125,
|
|
"calib/ece": 0.22282421875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0020231626610936956,
|
|
"calib/mean_conf": 0.008582031249999999,
|
|
"calib/mu_c": 0.007017241379310346,
|
|
"calib/mu_w": 0.009040404040404041,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.002421875,
|
|
"calib/std_conf": 0.03934672979898104,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22057758620689655,
|
|
"calib/step_q_c_n": 348.0,
|
|
"calib/step_q_gap": -0.04127733182589033,
|
|
"calib/step_q_w": 0.2618549180327869,
|
|
"calib/step_q_w_n": 1220.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1122.0,
|
|
"completions/max_terminated_length": 1122.0,
|
|
"completions/mean_length": 262.4453125,
|
|
"completions/mean_terminated_length": 263.4745178222656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 71.0,
|
|
"epoch": 0.12586666666666665,
|
|
"grad_norm": 0.012375210411846638,
|
|
"learning_rate": 2.277777777777778e-06,
|
|
"loss": 0.2421,
|
|
"num_tokens": 23501023.0,
|
|
"reward": 1.0007789134979248,
|
|
"reward_std": 0.005302540026605129,
|
|
"rewards/accuracy_reward_step": 0.2265625,
|
|
"rewards/final_brier_reward_step": 0.7749953269958496,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 118
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9049360640347004,
|
|
"aux_distill/mean_u": 0.28474286811428084,
|
|
"aux_distill/n_active_tok": 187.75,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4892958892958894,
|
|
"calib/avg_num_step_conf": 5.8671875,
|
|
"calib/ece": 0.2501653543307087,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0032437932437932426,
|
|
"calib/mean_conf": 0.00825984251968504,
|
|
"calib/mu_c": 0.005846153846153846,
|
|
"calib/mu_w": 0.009089947089947089,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0012598425196850393,
|
|
"calib/std_conf": 0.022346664779041493,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.25038108882521487,
|
|
"calib/step_q_c_n": 349.0,
|
|
"calib/step_q_gap": -0.061822727306615144,
|
|
"calib/step_q_w": 0.31220381613183,
|
|
"calib/step_q_w_n": 1153.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 949.0,
|
|
"completions/max_terminated_length": 949.0,
|
|
"completions/mean_length": 262.94921875,
|
|
"completions/mean_terminated_length": 263.98040771484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.12693333333333334,
|
|
"grad_norm": 0.01229011919349432,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.2588,
|
|
"num_tokens": 23673402.0,
|
|
"reward": 0.9933902621269226,
|
|
"reward_std": 0.025129135698080063,
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
|
"rewards/final_brier_reward_step": 0.74068683385849,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 119
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8911927230656147,
|
|
"aux_distill/mean_u": 0.27544143435489915,
|
|
"aux_distill/n_active_tok": 195.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4726834724253635,
|
|
"calib/avg_num_step_conf": 6.1171875,
|
|
"calib/ece": 0.22398828125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0013732255011614887,
|
|
"calib/mean_conf": 0.006480468749999999,
|
|
"calib/mu_c": 0.005423728813559323,
|
|
"calib/mu_w": 0.006796954314720812,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008836603195248354,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2671947194719472,
|
|
"calib/step_q_c_n": 303.0,
|
|
"calib/step_q_gap": 0.0029127717284792576,
|
|
"calib/step_q_w": 0.2642819477434679,
|
|
"calib/step_q_w_n": 1263.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1046.0,
|
|
"completions/max_terminated_length": 1046.0,
|
|
"completions/mean_length": 260.39453125,
|
|
"completions/mean_terminated_length": 261.41571044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.01112359669059515,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": 0.2705,
|
|
"num_tokens": 23846751.0,
|
|
"reward": 0.9972838759422302,
|
|
"reward_std": 0.013594256713986397,
|
|
"rewards/accuracy_reward_step": 0.23046875,
|
|
"rewards/final_brier_reward_step": 0.7680052518844604,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 120
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9054578468203545,
|
|
"aux_distill/mean_u": 0.27849697061773127,
|
|
"aux_distill/n_active_tok": 196.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.523109243697479,
|
|
"calib/avg_num_step_conf": 6.1328125,
|
|
"calib/ece": 0.18948616600790513,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0020778311324529813,
|
|
"calib/mean_conf": 0.005059288537549407,
|
|
"calib/mu_c": 0.0067346938775510205,
|
|
"calib/mu_w": 0.004656862745098039,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00043478260869565214,
|
|
"calib/std_conf": 0.010122590607178607,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.1908032128514056,
|
|
"calib/step_q_c_n": 249.0,
|
|
"calib/step_q_gap": -0.04857680228863981,
|
|
"calib/step_q_w": 0.23938001514004542,
|
|
"calib/step_q_w_n": 1321.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2785.0,
|
|
"completions/max_terminated_length": 2785.0,
|
|
"completions/mean_length": 278.1640625,
|
|
"completions/mean_terminated_length": 279.2549133300781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.12906666666666666,
|
|
"grad_norm": 0.01199932862073183,
|
|
"learning_rate": 2.1944444444444445e-06,
|
|
"loss": 0.2978,
|
|
"num_tokens": 24023017.0,
|
|
"reward": 0.9895070195198059,
|
|
"reward_std": 0.03511889651417732,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.7993265390396118,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 121
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9337432067841291,
|
|
"aux_distill/mean_u": 0.34765335554757254,
|
|
"aux_distill/n_active_tok": 192.5,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.46111111111111114,
|
|
"calib/avg_num_step_conf": 6.015625,
|
|
"calib/ece": 0.17176470588235296,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.001126984126984127,
|
|
"calib/mean_conf": 0.004705882352941177,
|
|
"calib/mu_c": 0.003777777777777778,
|
|
"calib/mu_w": 0.004904761904761905,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007071883343592063,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.32420078740157476,
|
|
"calib/step_q_c_n": 254.0,
|
|
"calib/step_q_gap": 0.05633997869239904,
|
|
"calib/step_q_w": 0.2678608087091757,
|
|
"calib/step_q_w_n": 1286.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1603.0,
|
|
"completions/max_terminated_length": 1603.0,
|
|
"completions/mean_length": 260.8125,
|
|
"completions/mean_terminated_length": 261.8352966308594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.13013333333333332,
|
|
"grad_norm": 0.013110945001244545,
|
|
"learning_rate": 2.166666666666667e-06,
|
|
"loss": 0.2786,
|
|
"num_tokens": 24197129.0,
|
|
"reward": 0.9967218637466431,
|
|
"reward_std": 0.012659726664423943,
|
|
"rewards/accuracy_reward_step": 0.17578125,
|
|
"rewards/final_brier_reward_step": 0.8215687274932861,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 122
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9081991314888,
|
|
"aux_distill/mean_u": 0.3049377110123672,
|
|
"aux_distill/n_active_tok": 191.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.46956362302896953,
|
|
"calib/avg_num_step_conf": 5.99609375,
|
|
"calib/ece": 0.20609375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00218921892189219,
|
|
"calib/mean_conf": 0.006171875,
|
|
"calib/mu_c": 0.0044444444444444444,
|
|
"calib/mu_w": 0.006633663366336634,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0006640625,
|
|
"calib/std_conf": 0.013841801146685175,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.250663082437276,
|
|
"calib/step_q_c_n": 279.0,
|
|
"calib/step_q_gap": -0.02002242711686414,
|
|
"calib/step_q_w": 0.2706855095541401,
|
|
"calib/step_q_w_n": 1256.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 824.0,
|
|
"completions/max_terminated_length": 824.0,
|
|
"completions/mean_length": 273.9453125,
|
|
"completions/mean_terminated_length": 275.0196228027344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.012809602543711662,
|
|
"learning_rate": 2.138888888888889e-06,
|
|
"loss": 0.2877,
|
|
"num_tokens": 24372547.0,
|
|
"reward": 1.0008225440979004,
|
|
"reward_std": 0.002213716506958008,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.7907078266143799,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 123
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9068530723452568,
|
|
"aux_distill/mean_u": 0.24844131723727766,
|
|
"aux_distill/n_active_tok": 165.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5376205520452277,
|
|
"calib/avg_num_step_conf": 5.203125,
|
|
"calib/ece": 0.23624609375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0010989358164283343,
|
|
"calib/mean_conf": 0.00594140625,
|
|
"calib/mu_c": 0.006774193548387097,
|
|
"calib/mu_w": 0.005675257731958763,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007696417869532613,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3619039039039039,
|
|
"calib/step_q_c_n": 333.0,
|
|
"calib/step_q_gap": 0.111948948948949,
|
|
"calib/step_q_w": 0.24995495495495493,
|
|
"calib/step_q_w_n": 999.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 611.0,
|
|
"completions/max_terminated_length": 611.0,
|
|
"completions/mean_length": 237.73046875,
|
|
"completions/mean_terminated_length": 238.6627655029297,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 71.0,
|
|
"epoch": 0.13226666666666667,
|
|
"grad_norm": 0.011809157207608223,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": 0.2786,
|
|
"num_tokens": 24540222.0,
|
|
"reward": 1.0015933513641357,
|
|
"reward_std": 0.0029463740065693855,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.7609992027282715,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 124
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8929617684334517,
|
|
"aux_distill/mean_u": 0.2558508365630142,
|
|
"aux_distill/n_active_tok": 174.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.48128957973517555,
|
|
"calib/avg_num_step_conf": 5.44921875,
|
|
"calib/ece": 0.24052421875000002,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0013656879677605071,
|
|
"calib/mean_conf": 0.006585156250000001,
|
|
"calib/mu_c": 0.005555555555555556,
|
|
"calib/mu_w": 0.006921243523316063,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0005078125,
|
|
"calib/std_conf": 0.012401295231268625,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.32620000000000005,
|
|
"calib/step_q_c_n": 300.0,
|
|
"calib/step_q_gap": 0.02711415525114158,
|
|
"calib/step_q_w": 0.29908584474885846,
|
|
"calib/step_q_w_n": 1095.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 905.0,
|
|
"completions/max_terminated_length": 905.0,
|
|
"completions/mean_length": 250.09765625,
|
|
"completions/mean_terminated_length": 251.0784454345703,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 67.0,
|
|
"epoch": 0.13333333333333333,
|
|
"grad_norm": 0.012777113355696201,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": 0.2809,
|
|
"num_tokens": 24709055.0,
|
|
"reward": 1.0012686252593994,
|
|
"reward_std": 0.0029744510538876057,
|
|
"rewards/accuracy_reward_step": 0.24609375,
|
|
"rewards/final_brier_reward_step": 0.7564435005187988,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 125
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8970666322857141,
|
|
"aux_distill/mean_u": 0.2859182237069479,
|
|
"aux_distill/n_active_tok": 183.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5622351206928322,
|
|
"calib/avg_num_step_conf": 5.71875,
|
|
"calib/ece": 0.2055921568627451,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00180154781647319,
|
|
"calib/mean_conf": 0.006172549019607843,
|
|
"calib/mu_c": 0.0075925925925925935,
|
|
"calib/mu_w": 0.0057910447761194035,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008019683589459628,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3376099585062241,
|
|
"calib/step_q_c_n": 241.0,
|
|
"calib/step_q_gap": 0.06509564943018159,
|
|
"calib/step_q_w": 0.2725143090760425,
|
|
"calib/step_q_w_n": 1223.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 678.0,
|
|
"completions/max_terminated_length": 678.0,
|
|
"completions/mean_length": 254.7890625,
|
|
"completions/mean_terminated_length": 255.7882537841797,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.010087775066494942,
|
|
"learning_rate": 2.0555555555555555e-06,
|
|
"loss": 0.281,
|
|
"num_tokens": 24879745.0,
|
|
"reward": 0.993738055229187,
|
|
"reward_std": 0.02483932301402092,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.784351110458374,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 126
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8760164938867092,
|
|
"aux_distill/mean_u": 0.25388843472722616,
|
|
"aux_distill/n_active_tok": 181.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5213604693802714,
|
|
"calib/avg_num_step_conf": 5.7421875,
|
|
"calib/ece": 0.20528125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008111477814448107,
|
|
"calib/mean_conf": 0.00565625,
|
|
"calib/mu_c": 0.006296296296296296,
|
|
"calib/mu_w": 0.005485148514851486,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008934166773544134,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2709774436090226,
|
|
"calib/step_q_c_n": 266.0,
|
|
"calib/step_q_gap": 0.003449370519321593,
|
|
"calib/step_q_w": 0.267528073089701,
|
|
"calib/step_q_w_n": 1204.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1207.0,
|
|
"completions/max_terminated_length": 1207.0,
|
|
"completions/mean_length": 248.9765625,
|
|
"completions/mean_terminated_length": 249.9529571533203,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.13546666666666668,
|
|
"grad_norm": 0.010908103547990322,
|
|
"learning_rate": 2.027777777777778e-06,
|
|
"loss": 0.2468,
|
|
"num_tokens": 25047155.0,
|
|
"reward": 0.9973659515380859,
|
|
"reward_std": 0.013409133069217205,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.7877006530761719,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 127
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9018064886331558,
|
|
"aux_distill/mean_u": 0.31848810168209557,
|
|
"aux_distill/n_active_tok": 188.875,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.518262987012987,
|
|
"calib/avg_num_step_conf": 6.19140625,
|
|
"calib/ece": 0.21394566929133857,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0018440836940836923,
|
|
"calib/mean_conf": 0.006526771653543308,
|
|
"calib/mu_c": 0.007964285714285714,
|
|
"calib/mu_w": 0.006120202020202021,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010045008544764575,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22672142857142857,
|
|
"calib/step_q_c_n": 280.0,
|
|
"calib/step_q_gap": -0.05722033388067871,
|
|
"calib/step_q_w": 0.2839417624521073,
|
|
"calib/step_q_w_n": 1305.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2661.0,
|
|
"completions/max_terminated_length": 2661.0,
|
|
"completions/mean_length": 287.5,
|
|
"completions/mean_terminated_length": 288.6274719238281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.13653333333333334,
|
|
"grad_norm": 0.010613925755023956,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.3066,
|
|
"num_tokens": 25227419.0,
|
|
"reward": 0.9938584566116333,
|
|
"reward_std": 0.025512343272566795,
|
|
"rewards/accuracy_reward_step": 0.21875,
|
|
"rewards/final_brier_reward_step": 0.7767794728279114,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 128
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8973717913031578,
|
|
"aux_distill/mean_u": 0.28847371241550984,
|
|
"aux_distill/n_active_tok": 185.75,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5291068580542264,
|
|
"calib/avg_num_step_conf": 5.80859375,
|
|
"calib/ece": 0.2196,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.001554492291334397,
|
|
"calib/mean_conf": 0.0084,
|
|
"calib/mu_c": 0.007192982456140352,
|
|
"calib/mu_w": 0.008747474747474749,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.002235294117647059,
|
|
"calib/std_conf": 0.027026290758386782,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.31616983050847464,
|
|
"calib/step_q_c_n": 295.0,
|
|
"calib/step_q_gap": -0.009838558753270332,
|
|
"calib/step_q_w": 0.32600838926174497,
|
|
"calib/step_q_w_n": 1192.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1148.0,
|
|
"completions/max_terminated_length": 1148.0,
|
|
"completions/mean_length": 250.01953125,
|
|
"completions/mean_terminated_length": 251.00001525878906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.011224478483200073,
|
|
"learning_rate": 1.9722222222222224e-06,
|
|
"loss": 0.2615,
|
|
"num_tokens": 25393808.0,
|
|
"reward": 0.9972963333129883,
|
|
"reward_std": 0.014956073835492134,
|
|
"rewards/accuracy_reward_step": 0.22265625,
|
|
"rewards/final_brier_reward_step": 0.7758427858352661,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 129
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8825326859951019,
|
|
"aux_distill/mean_u": 0.2890509668456145,
|
|
"aux_distill/n_active_tok": 200.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4762126077404091,
|
|
"calib/avg_num_step_conf": 6.25,
|
|
"calib/ece": 0.2328235294117647,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00021379077235085184,
|
|
"calib/mean_conf": 0.006392156862745099,
|
|
"calib/mu_c": 0.006229508196721313,
|
|
"calib/mu_w": 0.006443298969072165,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009220774565254115,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2656804733727811,
|
|
"calib/step_q_c_n": 338.0,
|
|
"calib/step_q_gap": -0.05083117480471494,
|
|
"calib/step_q_w": 0.31651164817749605,
|
|
"calib/step_q_w_n": 1262.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2230.0,
|
|
"completions/max_terminated_length": 2230.0,
|
|
"completions/mean_length": 263.3515625,
|
|
"completions/mean_terminated_length": 263.3515625,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.13866666666666666,
|
|
"grad_norm": 0.010412861593067646,
|
|
"learning_rate": 1.944444444444445e-06,
|
|
"loss": 0.3018,
|
|
"num_tokens": 25566514.0,
|
|
"reward": 0.9975153803825378,
|
|
"reward_std": 0.014174779877066612,
|
|
"rewards/accuracy_reward_step": 0.23828125,
|
|
"rewards/final_brier_reward_step": 0.7606558203697205,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 130
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8887091083452106,
|
|
"aux_distill/mean_u": 0.24683482745846644,
|
|
"aux_distill/n_active_tok": 180.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.48694612024131473,
|
|
"calib/avg_num_step_conf": 5.984375,
|
|
"calib/ece": 0.17593411764705885,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00349727480757229,
|
|
"calib/mean_conf": 0.008301176470588237,
|
|
"calib/mu_c": 0.005434782608695652,
|
|
"calib/mu_w": 0.008932057416267942,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0019215686274509803,
|
|
"calib/std_conf": 0.03141364195022127,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3232857142857143,
|
|
"calib/step_q_c_n": 210.0,
|
|
"calib/step_q_gap": 0.020082764210071324,
|
|
"calib/step_q_w": 0.30320295007564296,
|
|
"calib/step_q_w_n": 1322.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 932.0,
|
|
"completions/max_terminated_length": 932.0,
|
|
"completions/mean_length": 244.96484375,
|
|
"completions/mean_terminated_length": 245.92550659179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.13973333333333332,
|
|
"grad_norm": 0.01468158233910799,
|
|
"learning_rate": 1.916666666666667e-06,
|
|
"loss": 0.2812,
|
|
"num_tokens": 25735433.0,
|
|
"reward": 0.9965444803237915,
|
|
"reward_std": 0.014411761425435543,
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
|
"rewards/final_brier_reward_step": 0.817307710647583,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 131
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8533799685537815,
|
|
"aux_distill/mean_u": 0.3045404417038223,
|
|
"aux_distill/n_active_tok": 186.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4730164158686731,
|
|
"calib/avg_num_step_conf": 5.8359375,
|
|
"calib/ece": 0.32980234375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.001534309165526676,
|
|
"calib/mean_conf": 0.00613515625,
|
|
"calib/mu_c": 0.005116279069767442,
|
|
"calib/mu_w": 0.006650588235294118,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010051524214669432,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.32058595641646487,
|
|
"calib/step_q_c_n": 413.0,
|
|
"calib/step_q_gap": 0.0213463634469922,
|
|
"calib/step_q_w": 0.29923959296947267,
|
|
"calib/step_q_w_n": 1081.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 680.0,
|
|
"completions/max_terminated_length": 680.0,
|
|
"completions/mean_length": 254.4765625,
|
|
"completions/mean_terminated_length": 255.47451782226562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.01189375203102827,
|
|
"learning_rate": 1.888888888888889e-06,
|
|
"loss": 0.2757,
|
|
"num_tokens": 25906171.0,
|
|
"reward": 1.0016493797302246,
|
|
"reward_std": 0.0029917373321950436,
|
|
"rewards/accuracy_reward_step": 0.3359375,
|
|
"rewards/final_brier_reward_step": 0.667361319065094,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 132
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9298758413642645,
|
|
"aux_distill/mean_u": 0.31848106660529757,
|
|
"aux_distill/n_active_tok": 199.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.49122422114962705,
|
|
"calib/avg_num_step_conf": 6.2265625,
|
|
"calib/ece": 0.1622392156862745,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0025775559455901726,
|
|
"calib/mean_conf": 0.00795686274509804,
|
|
"calib/mu_c": 0.005813953488372093,
|
|
"calib/mu_w": 0.008391509433962266,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0007843137254901962,
|
|
"calib/std_conf": 0.015985351810660065,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.35344493392070486,
|
|
"calib/step_q_c_n": 227.0,
|
|
"calib/step_q_gap": 0.05997748695654975,
|
|
"calib/step_q_w": 0.2934674469641551,
|
|
"calib/step_q_w_n": 1367.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2977.0,
|
|
"completions/max_terminated_length": 2977.0,
|
|
"completions/mean_length": 289.73046875,
|
|
"completions/mean_terminated_length": 289.73046875,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.14186666666666667,
|
|
"grad_norm": 0.01119232177734375,
|
|
"learning_rate": 1.8611111111111113e-06,
|
|
"loss": 0.3108,
|
|
"num_tokens": 26086686.0,
|
|
"reward": 0.9969115257263184,
|
|
"reward_std": 0.013512922450900078,
|
|
"rewards/accuracy_reward_step": 0.16796875,
|
|
"rewards/final_brier_reward_step": 0.8297605514526367,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 133
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9139052350074053,
|
|
"aux_distill/mean_u": 0.29659186276340005,
|
|
"aux_distill/n_active_tok": 192.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.45802995631370913,
|
|
"calib/avg_num_step_conf": 6.12890625,
|
|
"calib/ece": 0.17641755366098244,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.004203463236600387,
|
|
"calib/mean_conf": 0.00844519143705679,
|
|
"calib/mu_c": 0.005,
|
|
"calib/mu_w": 0.009203463236600387,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0022352941176470584,
|
|
"calib/std_conf": 0.03632257550847606,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24870546875,
|
|
"calib/step_q_c_n": 256.0,
|
|
"calib/step_q_gap": -0.05996551373286363,
|
|
"calib/step_q_w": 0.3086709824828636,
|
|
"calib/step_q_w_n": 1313.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 859.0,
|
|
"completions/max_terminated_length": 859.0,
|
|
"completions/mean_length": 277.6015625,
|
|
"completions/mean_terminated_length": 278.6902160644531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.14293333333333333,
|
|
"grad_norm": 0.01119636744260788,
|
|
"learning_rate": 1.8333333333333333e-06,
|
|
"loss": 0.2704,
|
|
"num_tokens": 26266704.0,
|
|
"reward": 0.9962995648384094,
|
|
"reward_std": 0.014586620032787323,
|
|
"rewards/accuracy_reward_step": 0.1796875,
|
|
"rewards/final_brier_reward_step": 0.8168178796768188,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 134
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9191968068480492,
|
|
"aux_distill/mean_u": 0.2777864922801156,
|
|
"aux_distill/n_active_tok": 197.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4424755355725998,
|
|
"calib/avg_num_step_conf": 6.20703125,
|
|
"calib/ece": 0.220390625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": -0.0060897469805166185,
|
|
"calib/mean_conf": 0.009453125,
|
|
"calib/mu_c": 0.004719298245614035,
|
|
"calib/mu_w": 0.010809045226130654,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00359375,
|
|
"calib/std_conf": 0.05792726573241979,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2797987804878049,
|
|
"calib/step_q_c_n": 328.0,
|
|
"calib/step_q_gap": -0.027209149726310833,
|
|
"calib/step_q_w": 0.3070079302141157,
|
|
"calib/step_q_w_n": 1261.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1090.0,
|
|
"completions/max_terminated_length": 1090.0,
|
|
"completions/mean_length": 285.00390625,
|
|
"completions/mean_terminated_length": 286.12158203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.011457856744527817,
|
|
"learning_rate": 1.8055555555555557e-06,
|
|
"loss": 0.2915,
|
|
"num_tokens": 26445545.0,
|
|
"reward": 0.9993282556533813,
|
|
"reward_std": 0.007239358965307474,
|
|
"rewards/accuracy_reward_step": 0.22265625,
|
|
"rewards/final_brier_reward_step": 0.7760003805160522,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 135
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8874705582857132,
|
|
"aux_distill/mean_u": 0.2744993750846104,
|
|
"aux_distill/n_active_tok": 193.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.57156432748538,
|
|
"calib/avg_num_step_conf": 6.08984375,
|
|
"calib/ece": 0.29486328125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": -0.0036798245614035096,
|
|
"calib/mean_conf": 0.009824218750000002,
|
|
"calib/mu_c": 0.007236842105263159,
|
|
"calib/mu_w": 0.010916666666666668,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00390625,
|
|
"calib/std_conf": 0.06271242306913478,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2921197916666667,
|
|
"calib/step_q_c_n": 384.0,
|
|
"calib/step_q_gap": 0.0010414937943262759,
|
|
"calib/step_q_w": 0.2910782978723404,
|
|
"calib/step_q_w_n": 1175.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 831.0,
|
|
"completions/max_terminated_length": 831.0,
|
|
"completions/mean_length": 266.30078125,
|
|
"completions/mean_terminated_length": 267.3451232910156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.14506666666666668,
|
|
"grad_norm": 0.011349204927682877,
|
|
"learning_rate": 1.777777777777778e-06,
|
|
"loss": 0.2561,
|
|
"num_tokens": 26622206.0,
|
|
"reward": 1.000133752822876,
|
|
"reward_std": 0.008982696570456028,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.703392505645752,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 136
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8992571644484997,
|
|
"aux_distill/mean_u": 0.2754937155461978,
|
|
"aux_distill/n_active_tok": 185.0,
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.51244918699187,
|
|
"calib/avg_num_step_conf": 5.8125,
|
|
"calib/ece": 0.18371541501976282,
|
|
"calib/final_conf_rate": 0.98828125,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00029878048780487745,
|
|
"calib/mean_conf": 0.006007905138339921,
|
|
"calib/mu_c": 0.0062499999999999995,
|
|
"calib/mu_w": 0.005951219512195122,
|
|
"calib/nonempty_final_conf_rate": 0.98828125,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007186940337990243,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3092271062271062,
|
|
"calib/step_q_c_n": 273.0,
|
|
"calib/step_q_gap": 0.046130809930809924,
|
|
"calib/step_q_w": 0.2630962962962963,
|
|
"calib/step_q_w_n": 1215.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1103.0,
|
|
"completions/max_terminated_length": 1103.0,
|
|
"completions/mean_length": 262.55078125,
|
|
"completions/mean_terminated_length": 265.6640319824219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 84.0,
|
|
"epoch": 0.14613333333333334,
|
|
"grad_norm": 0.0104745551943779,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.2451,
|
|
"num_tokens": 26796403.0,
|
|
"reward": 0.9894097447395325,
|
|
"reward_std": 0.027691062539815903,
|
|
"rewards/accuracy_reward_step": 0.1875,
|
|
"rewards/final_brier_reward_step": 0.8030382990837097,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"step": 137
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8681779894977808,
|
|
"aux_distill/mean_u": 0.2406305521502327,
|
|
"aux_distill/n_active_tok": 180.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4716071428571429,
|
|
"calib/avg_num_step_conf": 5.671875,
|
|
"calib/ece": 0.21349609375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0005535714285714293,
|
|
"calib/mean_conf": 0.00525390625,
|
|
"calib/mu_c": 0.004821428571428572,
|
|
"calib/mu_w": 0.005375000000000001,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007384129966774077,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2772347266881029,
|
|
"calib/step_q_c_n": 311.0,
|
|
"calib/step_q_gap": -0.034996736940293205,
|
|
"calib/step_q_w": 0.3122314636283961,
|
|
"calib/step_q_w_n": 1141.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 754.0,
|
|
"completions/max_terminated_length": 754.0,
|
|
"completions/mean_length": 244.9453125,
|
|
"completions/mean_terminated_length": 245.90589904785156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.011168470606207848,
|
|
"learning_rate": 1.7222222222222224e-06,
|
|
"loss": 0.2743,
|
|
"num_tokens": 26963445.0,
|
|
"reward": 1.0010135173797607,
|
|
"reward_std": 0.0024566391948610544,
|
|
"rewards/accuracy_reward_step": 0.21875,
|
|
"rewards/final_brier_reward_step": 0.7832772731781006,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 138
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.865342266857624,
|
|
"aux_distill/mean_u": 0.27075061252326454,
|
|
"aux_distill/n_active_tok": 177.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5913461538461537,
|
|
"calib/avg_num_step_conf": 5.56640625,
|
|
"calib/ece": 0.19644921875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0024822775263951746,
|
|
"calib/mean_conf": 0.00667578125,
|
|
"calib/mu_c": 0.008653846153846154,
|
|
"calib/mu_w": 0.00617156862745098,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007693364085505666,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2794509803921568,
|
|
"calib/step_q_c_n": 255.0,
|
|
"calib/step_q_gap": -0.01616978883861242,
|
|
"calib/step_q_w": 0.29562076923076924,
|
|
"calib/step_q_w_n": 1170.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 752.0,
|
|
"completions/max_terminated_length": 752.0,
|
|
"completions/mean_length": 251.046875,
|
|
"completions/mean_terminated_length": 252.03138732910156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.14826666666666666,
|
|
"grad_norm": 0.014333439990878105,
|
|
"learning_rate": 1.6944444444444446e-06,
|
|
"loss": 0.2661,
|
|
"num_tokens": 27130809.0,
|
|
"reward": 1.0017058849334717,
|
|
"reward_std": 0.0031183804385364056,
|
|
"rewards/accuracy_reward_step": 0.203125,
|
|
"rewards/final_brier_reward_step": 0.8002868294715881,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 139
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8883232790976763,
|
|
"aux_distill/mean_u": 0.24898915879042913,
|
|
"aux_distill/n_active_tok": 178.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5332845052083333,
|
|
"calib/avg_num_step_conf": 5.62109375,
|
|
"calib/ece": 0.2438671875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0009895833333333336,
|
|
"calib/mean_conf": 0.006132812499999999,
|
|
"calib/mu_c": 0.006875000000000001,
|
|
"calib/mu_w": 0.005885416666666667,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007145662029500398,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2609285714285714,
|
|
"calib/step_q_c_n": 294.0,
|
|
"calib/step_q_gap": -0.03759893948845916,
|
|
"calib/step_q_w": 0.29852751091703056,
|
|
"calib/step_q_w_n": 1145.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 794.0,
|
|
"completions/max_terminated_length": 794.0,
|
|
"completions/mean_length": 249.2421875,
|
|
"completions/mean_terminated_length": 250.21961975097656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 92.0,
|
|
"epoch": 0.14933333333333335,
|
|
"grad_norm": 0.010594126768410206,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.2774,
|
|
"num_tokens": 27299631.0,
|
|
"reward": 1.0016744136810303,
|
|
"reward_std": 0.0027817150112241507,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7533488273620605,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 140
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.868485214188695,
|
|
"aux_distill/mean_u": 0.2833877031353884,
|
|
"aux_distill/n_active_tok": 171.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5427070197562001,
|
|
"calib/avg_num_step_conf": 5.48828125,
|
|
"calib/ece": 0.2321875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008238755779739378,
|
|
"calib/mean_conf": 0.006093749999999999,
|
|
"calib/mu_c": 0.006721311475409836,
|
|
"calib/mu_w": 0.0058974358974358985,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007366984521328926,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26901041666666664,
|
|
"calib/step_q_c_n": 288.0,
|
|
"calib/step_q_gap": 0.005483290435690791,
|
|
"calib/step_q_w": 0.26352712623097585,
|
|
"calib/step_q_w_n": 1117.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1212.0,
|
|
"completions/max_terminated_length": 1212.0,
|
|
"completions/mean_length": 269.90625,
|
|
"completions/mean_terminated_length": 270.9647216796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.011970674619078636,
|
|
"learning_rate": 1.638888888888889e-06,
|
|
"loss": 0.2512,
|
|
"num_tokens": 27475823.0,
|
|
"reward": 1.0015559196472168,
|
|
"reward_std": 0.0027788393199443817,
|
|
"rewards/accuracy_reward_step": 0.23828125,
|
|
"rewards/final_brier_reward_step": 0.764830470085144,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 141
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8942771572619677,
|
|
"aux_distill/mean_u": 0.28075573797004366,
|
|
"aux_distill/n_active_tok": 180.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.510369532428356,
|
|
"calib/avg_num_step_conf": 5.7109375,
|
|
"calib/ece": 0.1976796875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0004064856711915544,
|
|
"calib/mean_conf": 0.0054453125000000005,
|
|
"calib/mu_c": 0.00576923076923077,
|
|
"calib/mu_w": 0.005362745098039216,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.006972948391989127,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.30242448979591835,
|
|
"calib/step_q_c_n": 245.0,
|
|
"calib/step_q_gap": 0.044435171800848494,
|
|
"calib/step_q_w": 0.25798931799506986,
|
|
"calib/step_q_w_n": 1217.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 991.0,
|
|
"completions/max_terminated_length": 991.0,
|
|
"completions/mean_length": 260.453125,
|
|
"completions/mean_terminated_length": 261.4745178222656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.15146666666666667,
|
|
"grad_norm": 0.010074146091938019,
|
|
"learning_rate": 1.6111111111111113e-06,
|
|
"loss": 0.2714,
|
|
"num_tokens": 27647659.0,
|
|
"reward": 1.0011327266693115,
|
|
"reward_std": 0.002334078773856163,
|
|
"rewards/accuracy_reward_step": 0.203125,
|
|
"rewards/final_brier_reward_step": 0.799140453338623,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 142
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8822210561484098,
|
|
"aux_distill/mean_u": 0.2640907637767747,
|
|
"aux_distill/n_active_tok": 184.625,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5635157855408182,
|
|
"calib/avg_num_step_conf": 5.76953125,
|
|
"calib/ece": 0.20162509803921566,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001717093218755841,
|
|
"calib/mean_conf": 0.00739450980392157,
|
|
"calib/mu_c": 0.008754716981132078,
|
|
"calib/mu_w": 0.007037623762376237,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.000588235294117647,
|
|
"calib/std_conf": 0.01207094588520839,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2553433333333333,
|
|
"calib/step_q_c_n": 300.0,
|
|
"calib/step_q_gap": 0.014744352874539762,
|
|
"calib/step_q_w": 0.24059898045879355,
|
|
"calib/step_q_w_n": 1177.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2901.0,
|
|
"completions/max_terminated_length": 2901.0,
|
|
"completions/mean_length": 270.42578125,
|
|
"completions/mean_terminated_length": 270.42578125,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.15253333333333333,
|
|
"grad_norm": 0.011427761986851692,
|
|
"learning_rate": 1.5833333333333333e-06,
|
|
"loss": 0.3089,
|
|
"num_tokens": 27824224.0,
|
|
"reward": 0.9939003586769104,
|
|
"reward_std": 0.025681499391794205,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.7885819673538208,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 143
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8531425483524799,
|
|
"aux_distill/mean_u": 0.2394903155267863,
|
|
"aux_distill/n_active_tok": 172.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5109019886363637,
|
|
"calib/avg_num_step_conf": 5.46484375,
|
|
"calib/ece": 0.309479296875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.004416477272727273,
|
|
"calib/mean_conf": 0.009348828125,
|
|
"calib/mu_c": 0.0063125,
|
|
"calib/mu_w": 0.010728977272727274,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0031640625,
|
|
"calib/std_conf": 0.03690465648805999,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2530695443645084,
|
|
"calib/step_q_c_n": 417.0,
|
|
"calib/step_q_gap": -0.025420272336102556,
|
|
"calib/step_q_w": 0.27848981670061096,
|
|
"calib/step_q_w_n": 982.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 949.0,
|
|
"completions/max_terminated_length": 949.0,
|
|
"completions/mean_length": 237.46484375,
|
|
"completions/mean_terminated_length": 238.39608764648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 64.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.014708510600030422,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": 0.2486,
|
|
"num_tokens": 27989143.0,
|
|
"reward": 1.0012478828430176,
|
|
"reward_std": 0.004646037705242634,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.6899959444999695,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 144
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8793023275211453,
|
|
"aux_distill/mean_u": 0.2573759158547438,
|
|
"aux_distill/n_active_tok": 170.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4719298245614035,
|
|
"calib/avg_num_step_conf": 5.41796875,
|
|
"calib/ece": 0.2891796875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0012134502923976604,
|
|
"calib/mean_conf": 0.0076953125,
|
|
"calib/mu_c": 0.006842105263157895,
|
|
"calib/mu_w": 0.008055555555555555,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0076903537322637995,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27508333333333335,
|
|
"calib/step_q_c_n": 360.0,
|
|
"calib/step_q_gap": -0.0038816131126256925,
|
|
"calib/step_q_w": 0.27896494644595904,
|
|
"calib/step_q_w_n": 1027.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 787.0,
|
|
"completions/max_terminated_length": 787.0,
|
|
"completions/mean_length": 235.2734375,
|
|
"completions/mean_terminated_length": 236.1960906982422,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.15466666666666667,
|
|
"grad_norm": 0.01416355837136507,
|
|
"learning_rate": 1.527777777777778e-06,
|
|
"loss": 0.2677,
|
|
"num_tokens": 28152077.0,
|
|
"reward": 0.9980659484863281,
|
|
"reward_std": 0.014034947380423546,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.7031632661819458,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 145
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9093922805041075,
|
|
"aux_distill/mean_u": 0.2503467252894827,
|
|
"aux_distill/n_active_tok": 187.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5753644939965694,
|
|
"calib/avg_num_step_conf": 5.9765625,
|
|
"calib/ece": 0.165296875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0015523156089193857,
|
|
"calib/mean_conf": 0.006578125,
|
|
"calib/mu_c": 0.007863636363636366,
|
|
"calib/mu_w": 0.00631132075471698,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007265188158910614,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.25840707964601767,
|
|
"calib/step_q_c_n": 226.0,
|
|
"calib/step_q_gap": -0.02287896329876765,
|
|
"calib/step_q_w": 0.2812860429447853,
|
|
"calib/step_q_w_n": 1304.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1409.0,
|
|
"completions/max_terminated_length": 1409.0,
|
|
"completions/mean_length": 266.078125,
|
|
"completions/mean_terminated_length": 267.12158203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.15573333333333333,
|
|
"grad_norm": 0.01321055181324482,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 0.2996,
|
|
"num_tokens": 28327409.0,
|
|
"reward": 0.9973971843719482,
|
|
"reward_std": 0.01355915330350399,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.8268258571624756,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 146
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8763753715902567,
|
|
"aux_distill/mean_u": 0.23976348102847864,
|
|
"aux_distill/n_active_tok": 160.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5509268292682927,
|
|
"calib/avg_num_step_conf": 5.01171875,
|
|
"calib/ece": 0.18992549019607843,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0015760975609756091,
|
|
"calib/mean_conf": 0.0061529411764705885,
|
|
"calib/mu_c": 0.0074199999999999995,
|
|
"calib/mu_w": 0.00584390243902439,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.006925099360543684,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.22452479338842976,
|
|
"calib/step_q_c_n": 242.0,
|
|
"calib/step_q_gap": -0.025266753201387743,
|
|
"calib/step_q_w": 0.2497915465898175,
|
|
"calib/step_q_w_n": 1041.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1806.0,
|
|
"completions/max_terminated_length": 1806.0,
|
|
"completions/mean_length": 244.484375,
|
|
"completions/mean_terminated_length": 244.484375,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.012709351256489754,
|
|
"learning_rate": 1.4722222222222225e-06,
|
|
"loss": 0.285,
|
|
"num_tokens": 28493677.0,
|
|
"reward": 0.9955471158027649,
|
|
"reward_std": 0.018829353153705597,
|
|
"rewards/accuracy_reward_step": 0.1953125,
|
|
"rewards/final_brier_reward_step": 0.8035942316055298,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 147
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8877765387296677,
|
|
"aux_distill/mean_u": 0.2964384086564609,
|
|
"aux_distill/n_active_tok": 163.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4963146609488074,
|
|
"calib/avg_num_step_conf": 5.09375,
|
|
"calib/ece": 0.35081960784313726,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0003430045564191903,
|
|
"calib/mean_conf": 0.006043137254901961,
|
|
"calib/mu_c": 0.006263736263736264,
|
|
"calib/mu_w": 0.005920731707317074,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007692248578721788,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2919807228915663,
|
|
"calib/step_q_c_n": 415.0,
|
|
"calib/step_q_gap": 0.07896497486007026,
|
|
"calib/step_q_w": 0.21301574803149603,
|
|
"calib/step_q_w_n": 889.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 973.0,
|
|
"completions/max_terminated_length": 973.0,
|
|
"completions/mean_length": 232.33984375,
|
|
"completions/mean_terminated_length": 233.25099182128906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.15786666666666666,
|
|
"grad_norm": 0.012826824560761452,
|
|
"learning_rate": 1.4444444444444445e-06,
|
|
"loss": 0.2879,
|
|
"num_tokens": 28658268.0,
|
|
"reward": 0.9982725977897644,
|
|
"reward_std": 0.014345245435833931,
|
|
"rewards/accuracy_reward_step": 0.35546875,
|
|
"rewards/final_brier_reward_step": 0.6449828147888184,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 148
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8741916511207819,
|
|
"aux_distill/mean_u": 0.23303331402852917,
|
|
"aux_distill/n_active_tok": 177.125,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5589198036006546,
|
|
"calib/avg_num_step_conf": 5.81640625,
|
|
"calib/ece": 0.17889019607843135,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0005831628477905061,
|
|
"calib/mean_conf": 0.008560784313725492,
|
|
"calib/mu_c": 0.008085106382978725,
|
|
"calib/mu_w": 0.008668269230769231,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0015686274509803923,
|
|
"calib/std_conf": 0.02595023955030445,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.277008547008547,
|
|
"calib/step_q_c_n": 234.0,
|
|
"calib/step_q_gap": 0.023315319916913546,
|
|
"calib/step_q_w": 0.25369322709163344,
|
|
"calib/step_q_w_n": 1255.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1185.0,
|
|
"completions/max_terminated_length": 1185.0,
|
|
"completions/mean_length": 260.0,
|
|
"completions/mean_terminated_length": 261.0196228027344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.15893333333333334,
|
|
"grad_norm": 0.014396457001566887,
|
|
"learning_rate": 1.4166666666666667e-06,
|
|
"loss": 0.2461,
|
|
"num_tokens": 28829284.0,
|
|
"reward": 0.9936124086380005,
|
|
"reward_std": 0.024772923439741135,
|
|
"rewards/accuracy_reward_step": 0.18359375,
|
|
"rewards/final_brier_reward_step": 0.8114436864852905,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 149
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9308585114777088,
|
|
"aux_distill/mean_u": 0.2825607385398316,
|
|
"aux_distill/n_active_tok": 161.75,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5184814814814814,
|
|
"calib/avg_num_step_conf": 5.40625,
|
|
"calib/ece": 0.28629019607843137,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0006411111111111105,
|
|
"calib/mean_conf": 0.007827450980392158,
|
|
"calib/mu_c": 0.00828,
|
|
"calib/mu_w": 0.007638888888888889,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007624112379872847,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26277070063694263,
|
|
"calib/step_q_c_n": 314.0,
|
|
"calib/step_q_gap": 0.009795934281802476,
|
|
"calib/step_q_w": 0.25297476635514016,
|
|
"calib/step_q_w_n": 1070.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 843.0,
|
|
"completions/max_terminated_length": 843.0,
|
|
"completions/mean_length": 222.9921875,
|
|
"completions/mean_terminated_length": 223.86668395996094,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 82.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.013206169940531254,
|
|
"learning_rate": 1.3888888888888892e-06,
|
|
"loss": 0.2793,
|
|
"num_tokens": 28991330.0,
|
|
"reward": 0.998460054397583,
|
|
"reward_std": 0.014980091713368893,
|
|
"rewards/accuracy_reward_step": 0.29296875,
|
|
"rewards/final_brier_reward_step": 0.707857608795166,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 150
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9185364572331309,
|
|
"aux_distill/mean_u": 0.27937537430025083,
|
|
"aux_distill/n_active_tok": 170.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5205164992826399,
|
|
"calib/avg_num_step_conf": 5.3359375,
|
|
"calib/ece": 0.19266015625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0011143950263032042,
|
|
"calib/mean_conf": 0.006558593749999999,
|
|
"calib/mu_c": 0.007450980392156863,
|
|
"calib/mu_w": 0.006336585365853659,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007692610367908993,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.23504098360655734,
|
|
"calib/step_q_c_n": 244.0,
|
|
"calib/step_q_gap": -0.01838593261447652,
|
|
"calib/step_q_w": 0.25342691622103386,
|
|
"calib/step_q_w_n": 1122.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 757.0,
|
|
"completions/max_terminated_length": 757.0,
|
|
"completions/mean_length": 243.59765625,
|
|
"completions/mean_terminated_length": 244.55296325683594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 70.0,
|
|
"epoch": 0.16106666666666666,
|
|
"grad_norm": 0.012316438369452953,
|
|
"learning_rate": 1.3611111111111112e-06,
|
|
"loss": 0.279,
|
|
"num_tokens": 29160715.0,
|
|
"reward": 1.001433253288269,
|
|
"reward_std": 0.0031007230281829834,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.8036477565765381,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 151
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9062818996608257,
|
|
"aux_distill/mean_u": 0.23632444722897225,
|
|
"aux_distill/n_active_tok": 161.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4878723832528181,
|
|
"calib/avg_num_step_conf": 5.03515625,
|
|
"calib/ece": 0.17980392156862746,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00012077294685990288,
|
|
"calib/mean_conf": 0.008431372549019609,
|
|
"calib/mu_c": 0.008333333333333333,
|
|
"calib/mu_w": 0.008454106280193236,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008441853950176418,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2364607843137255,
|
|
"calib/step_q_c_n": 204.0,
|
|
"calib/step_q_gap": -0.02490880093973072,
|
|
"calib/step_q_w": 0.2613695852534562,
|
|
"calib/step_q_w_n": 1085.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1983.0,
|
|
"completions/max_terminated_length": 1983.0,
|
|
"completions/mean_length": 248.84375,
|
|
"completions/mean_terminated_length": 248.84375,
|
|
"completions/min_length": 68.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.16213333333333332,
|
|
"grad_norm": 0.011517630890011787,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": 0.3101,
|
|
"num_tokens": 29329811.0,
|
|
"reward": 0.9975853562355042,
|
|
"reward_std": 0.014188062399625778,
|
|
"rewards/accuracy_reward_step": 0.1875,
|
|
"rewards/final_brier_reward_step": 0.8115769624710083,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 152
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.94620705768466,
|
|
"aux_distill/mean_u": 0.3316092887910609,
|
|
"aux_distill/n_active_tok": 176.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.474765312761409,
|
|
"calib/avg_num_step_conf": 5.55859375,
|
|
"calib/ece": 0.19921875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0010484245747746056,
|
|
"calib/mean_conf": 0.0078125,
|
|
"calib/mu_c": 0.006981132075471699,
|
|
"calib/mu_w": 0.008029556650246305,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008518133231524382,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2650373134328358,
|
|
"calib/step_q_c_n": 268.0,
|
|
"calib/step_q_gap": -0.020681301285778886,
|
|
"calib/step_q_w": 0.2857186147186147,
|
|
"calib/step_q_w_n": 1155.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 688.0,
|
|
"completions/max_terminated_length": 688.0,
|
|
"completions/mean_length": 260.65625,
|
|
"completions/mean_terminated_length": 261.6784362792969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.013415186665952206,
|
|
"learning_rate": 1.3055555555555556e-06,
|
|
"loss": 0.3176,
|
|
"num_tokens": 29503859.0,
|
|
"reward": 1.0013785362243652,
|
|
"reward_std": 0.0025912360288202763,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.7957258224487305,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 153
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8725715447217226,
|
|
"aux_distill/mean_u": 0.2504315987939681,
|
|
"aux_distill/n_active_tok": 153.125,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5310153316645807,
|
|
"calib/avg_num_step_conf": 4.80078125,
|
|
"calib/ece": 0.25696484375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0012237171464330454,
|
|
"calib/mean_conf": 0.008660156249999999,
|
|
"calib/mu_c": 0.009558823529411767,
|
|
"calib/mu_w": 0.008335106382978722,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.00777853456478699,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3326111111111111,
|
|
"calib/step_q_c_n": 306.0,
|
|
"calib/step_q_gap": 0.02233375466474058,
|
|
"calib/step_q_w": 0.3102773564463705,
|
|
"calib/step_q_w_n": 923.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 621.0,
|
|
"completions/max_terminated_length": 621.0,
|
|
"completions/mean_length": 225.49609375,
|
|
"completions/mean_terminated_length": 226.38040161132812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.16426666666666667,
|
|
"grad_norm": 0.015053332783281803,
|
|
"learning_rate": 1.2777777777777779e-06,
|
|
"loss": 0.2345,
|
|
"num_tokens": 29666026.0,
|
|
"reward": 1.0024712085723877,
|
|
"reward_std": 0.004337704740464687,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7393175959587097,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 154
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8818569201976061,
|
|
"aux_distill/mean_u": 0.25869740968154836,
|
|
"aux_distill/n_active_tok": 157.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5340544871794871,
|
|
"calib/avg_num_step_conf": 4.93359375,
|
|
"calib/ece": 0.1779375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 2.5641025641027354e-05,
|
|
"calib/mean_conf": 0.009562500000000002,
|
|
"calib/mu_c": 0.009583333333333334,
|
|
"calib/mu_w": 0.009557692307692307,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.010310984737162595,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24661754385964912,
|
|
"calib/step_q_c_n": 285.0,
|
|
"calib/step_q_gap": -0.07756446023033042,
|
|
"calib/step_q_w": 0.32418200408997955,
|
|
"calib/step_q_w_n": 978.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 755.0,
|
|
"completions/max_terminated_length": 755.0,
|
|
"completions/mean_length": 229.77734375,
|
|
"completions/mean_terminated_length": 230.67845153808594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.16533333333333333,
|
|
"grad_norm": 0.015045061707496643,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.2287,
|
|
"num_tokens": 29832065.0,
|
|
"reward": 1.0016980171203613,
|
|
"reward_std": 0.0030909671913832426,
|
|
"rewards/accuracy_reward_step": 0.1875,
|
|
"rewards/final_brier_reward_step": 0.8158959746360779,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 155
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8972544763237238,
|
|
"aux_distill/mean_u": 0.2473313923428918,
|
|
"aux_distill/n_active_tok": 164.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.46288080449571134,
|
|
"calib/avg_num_step_conf": 5.1484375,
|
|
"calib/ece": 0.18242578125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0012630385487528342,
|
|
"calib/mean_conf": 0.00898046875,
|
|
"calib/mu_c": 0.007959183673469388,
|
|
"calib/mu_w": 0.009222222222222222,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008474894529153353,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2990153846153846,
|
|
"calib/step_q_c_n": 260.0,
|
|
"calib/step_q_gap": -0.009392932964955636,
|
|
"calib/step_q_w": 0.30840831758034026,
|
|
"calib/step_q_w_n": 1058.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 549.0,
|
|
"completions/max_terminated_length": 549.0,
|
|
"completions/mean_length": 237.28125,
|
|
"completions/mean_terminated_length": 238.21177673339844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.015800490975379944,
|
|
"learning_rate": 1.2222222222222223e-06,
|
|
"loss": 0.2672,
|
|
"num_tokens": 29997569.0,
|
|
"reward": 1.0014472007751465,
|
|
"reward_std": 0.0030206539668142796,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.811488151550293,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 156
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8981580398976803,
|
|
"aux_distill/mean_u": 0.2928962613172708,
|
|
"aux_distill/n_active_tok": 163.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5688922500720254,
|
|
"calib/avg_num_step_conf": 5.14453125,
|
|
"calib/ece": 0.2927734375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.008310285220397583,
|
|
"calib/mean_conf": 0.011914062500000001,
|
|
"calib/mu_c": 0.017692307692307695,
|
|
"calib/mu_w": 0.009382022471910112,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.031459430060732096,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3103519061583578,
|
|
"calib/step_q_c_n": 341.0,
|
|
"calib/step_q_gap": 0.019046578289505345,
|
|
"calib/step_q_w": 0.29130532786885244,
|
|
"calib/step_q_w_n": 976.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 599.0,
|
|
"completions/max_terminated_length": 599.0,
|
|
"completions/mean_length": 230.11328125,
|
|
"completions/mean_terminated_length": 231.0157012939453,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.16746666666666668,
|
|
"grad_norm": 0.013263057917356491,
|
|
"learning_rate": 1.1944444444444446e-06,
|
|
"loss": 0.2412,
|
|
"num_tokens": 30160206.0,
|
|
"reward": 1.0048247575759888,
|
|
"reward_std": 0.008723842911422253,
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
|
"rewards/final_brier_reward_step": 0.7049621343612671,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 157
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8862754721194506,
|
|
"aux_distill/mean_u": 0.25307615114231347,
|
|
"aux_distill/n_active_tok": 149.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4954807542465327,
|
|
"calib/avg_num_step_conf": 4.66796875,
|
|
"calib/ece": 0.26209490196078433,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.003050116877045348,
|
|
"calib/mean_conf": 0.011630588235294118,
|
|
"calib/mu_c": 0.009405797101449277,
|
|
"calib/mu_w": 0.012455913978494625,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0015686274509803923,
|
|
"calib/std_conf": 0.026572080394969538,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.29868025078369903,
|
|
"calib/step_q_c_n": 319.0,
|
|
"calib/step_q_gap": -0.0011962332345658022,
|
|
"calib/step_q_w": 0.29987648401826483,
|
|
"calib/step_q_w_n": 876.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 948.0,
|
|
"completions/max_terminated_length": 948.0,
|
|
"completions/mean_length": 226.74609375,
|
|
"completions/mean_terminated_length": 227.63531494140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.16853333333333334,
|
|
"grad_norm": 0.015819691121578217,
|
|
"learning_rate": 1.1666666666666668e-06,
|
|
"loss": 0.25,
|
|
"num_tokens": 30323493.0,
|
|
"reward": 0.9982098340988159,
|
|
"reward_std": 0.014394954778254032,
|
|
"rewards/accuracy_reward_step": 0.26953125,
|
|
"rewards/final_brier_reward_step": 0.7307947874069214,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 158
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8714629802852869,
|
|
"aux_distill/mean_u": 0.24025526118135004,
|
|
"aux_distill/n_active_tok": 165.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4885566188197767,
|
|
"calib/avg_num_step_conf": 5.1796875,
|
|
"calib/ece": 0.2496484375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0006395534290271118,
|
|
"calib/mean_conf": 0.0094140625,
|
|
"calib/mu_c": 0.008939393939393941,
|
|
"calib/mu_w": 0.009578947368421053,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.000625,
|
|
"calib/std_conf": 0.012749403995720495,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26255775577557755,
|
|
"calib/step_q_c_n": 303.0,
|
|
"calib/step_q_gap": -0.029500113237130143,
|
|
"calib/step_q_w": 0.2920578690127077,
|
|
"calib/step_q_w_n": 1023.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 756.0,
|
|
"completions/max_terminated_length": 756.0,
|
|
"completions/mean_length": 231.953125,
|
|
"completions/mean_terminated_length": 232.86276245117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 76.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.01259444747120142,
|
|
"learning_rate": 1.138888888888889e-06,
|
|
"loss": 0.2727,
|
|
"num_tokens": 30487657.0,
|
|
"reward": 1.0021791458129883,
|
|
"reward_std": 0.003811280243098736,
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
|
"rewards/final_brier_reward_step": 0.746545672416687,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 159
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8943109344691038,
|
|
"aux_distill/mean_u": 0.2432022044371476,
|
|
"aux_distill/n_active_tok": 161.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.46281108597285064,
|
|
"calib/avg_num_step_conf": 5.046875,
|
|
"calib/ece": 0.19409375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0016802413273001495,
|
|
"calib/mean_conf": 0.00903125,
|
|
"calib/mu_c": 0.007692307692307693,
|
|
"calib/mu_w": 0.009372549019607842,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008483382193294134,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2612128514056225,
|
|
"calib/step_q_c_n": 249.0,
|
|
"calib/step_q_gap": -0.01567305463464591,
|
|
"calib/step_q_w": 0.2768859060402684,
|
|
"calib/step_q_w_n": 1043.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 761.0,
|
|
"completions/max_terminated_length": 761.0,
|
|
"completions/mean_length": 225.2890625,
|
|
"completions/mean_terminated_length": 226.1725616455078,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.17066666666666666,
|
|
"grad_norm": 0.01451224647462368,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": 0.2765,
|
|
"num_tokens": 30650171.0,
|
|
"reward": 1.0014857053756714,
|
|
"reward_std": 0.0025799009017646313,
|
|
"rewards/accuracy_reward_step": 0.203125,
|
|
"rewards/final_brier_reward_step": 0.7998464703559875,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 160
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8607595209032297,
|
|
"aux_distill/mean_u": 0.2243861432103818,
|
|
"aux_distill/n_active_tok": 153.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5905117721697448,
|
|
"calib/avg_num_step_conf": 4.80859375,
|
|
"calib/ece": 0.31149019607843137,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0022212040039475533,
|
|
"calib/mean_conf": 0.01007843137254902,
|
|
"calib/mu_c": 0.011585365853658536,
|
|
"calib/mu_w": 0.009364161849710983,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009579068556083783,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2861194444444444,
|
|
"calib/step_q_c_n": 360.0,
|
|
"calib/step_q_gap": -0.02870030297231796,
|
|
"calib/step_q_w": 0.3148197474167624,
|
|
"calib/step_q_w_n": 871.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2208.0,
|
|
"completions/max_terminated_length": 2208.0,
|
|
"completions/mean_length": 234.79296875,
|
|
"completions/mean_terminated_length": 234.79296875,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.17173333333333332,
|
|
"grad_norm": 0.014872077852487564,
|
|
"learning_rate": 1.0833333333333335e-06,
|
|
"loss": 0.3085,
|
|
"num_tokens": 30814198.0,
|
|
"reward": 0.999708354473114,
|
|
"reward_std": 0.015861758962273598,
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
|
"rewards/final_brier_reward_step": 0.6830105781555176,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 161
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8608557693660259,
|
|
"aux_distill/mean_u": 0.23922221231065072,
|
|
"aux_distill/n_active_tok": 161.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5466666666666667,
|
|
"calib/avg_num_step_conf": 5.05859375,
|
|
"calib/ece": 0.34247960784313725,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003579595959595958,
|
|
"calib/mean_conf": 0.010461568627450981,
|
|
"calib/mu_c": 0.012777777777777777,
|
|
"calib/mu_w": 0.009198181818181819,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.020830856013262875,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2778433179723502,
|
|
"calib/step_q_c_n": 434.0,
|
|
"calib/step_q_gap": 0.022501854557716028,
|
|
"calib/step_q_w": 0.25534146341463415,
|
|
"calib/step_q_w_n": 861.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2932.0,
|
|
"completions/max_terminated_length": 2932.0,
|
|
"completions/mean_length": 246.33984375,
|
|
"completions/mean_terminated_length": 246.33984375,
|
|
"completions/min_length": 79.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.014128386043012142,
|
|
"learning_rate": 1.0555555555555557e-06,
|
|
"loss": 0.316,
|
|
"num_tokens": 30981405.0,
|
|
"reward": 1.0003153085708618,
|
|
"reward_std": 0.018365781754255295,
|
|
"rewards/accuracy_reward_step": 0.3515625,
|
|
"rewards/final_brier_reward_step": 0.6529743671417236,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 162
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.907149713486433,
|
|
"aux_distill/mean_u": 0.2809183020412603,
|
|
"aux_distill/n_active_tok": 158.625,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5661311414160284,
|
|
"calib/avg_num_step_conf": 4.95703125,
|
|
"calib/ece": 0.1992392156862745,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001162245469830004,
|
|
"calib/mean_conf": 0.009701960784313726,
|
|
"calib/mu_c": 0.010622641509433964,
|
|
"calib/mu_w": 0.00946039603960396,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0005490196078431374,
|
|
"calib/std_conf": 0.01125590895269695,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2518829268292683,
|
|
"calib/step_q_c_n": 205.0,
|
|
"calib/step_q_gap": 0.016614129836787117,
|
|
"calib/step_q_w": 0.2352687969924812,
|
|
"calib/step_q_w_n": 1064.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2564.0,
|
|
"completions/max_terminated_length": 2564.0,
|
|
"completions/mean_length": 241.34765625,
|
|
"completions/mean_terminated_length": 241.34765625,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.17386666666666667,
|
|
"grad_norm": 0.013407212682068348,
|
|
"learning_rate": 1.0277777777777777e-06,
|
|
"loss": 0.3076,
|
|
"num_tokens": 31148022.0,
|
|
"reward": 0.9981828927993774,
|
|
"reward_std": 0.014493662863969803,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.7932409644126892,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 163
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9057147167623043,
|
|
"aux_distill/mean_u": 0.23854662639106458,
|
|
"aux_distill/n_active_tok": 158.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5615351629502573,
|
|
"calib/avg_num_step_conf": 4.99609375,
|
|
"calib/ece": 0.16271093749999999,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0023816466552315646,
|
|
"calib/mean_conf": 0.0091640625,
|
|
"calib/mu_c": 0.011136363636363639,
|
|
"calib/mu_w": 0.008754716981132074,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009547805166429285,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2523084577114428,
|
|
"calib/step_q_c_n": 201.0,
|
|
"calib/step_q_gap": 0.00016652821237039817,
|
|
"calib/step_q_w": 0.2521419294990724,
|
|
"calib/step_q_w_n": 1078.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 733.0,
|
|
"completions/max_terminated_length": 733.0,
|
|
"completions/mean_length": 238.85546875,
|
|
"completions/mean_terminated_length": 239.79217529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.17493333333333333,
|
|
"grad_norm": 0.016263170167803764,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.2661,
|
|
"num_tokens": 31315305.0,
|
|
"reward": 1.001826524734497,
|
|
"reward_std": 0.0030358254443854094,
|
|
"rewards/accuracy_reward_step": 0.171875,
|
|
"rewards/final_brier_reward_step": 0.8317779898643494,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 164
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9077084865421057,
|
|
"aux_distill/mean_u": 0.28237916739274693,
|
|
"aux_distill/n_active_tok": 160.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4865963011395479,
|
|
"calib/avg_num_step_conf": 5.01171875,
|
|
"calib/ece": 0.19998823529411766,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.00015028955725761208,
|
|
"calib/mean_conf": 0.007854901960784314,
|
|
"calib/mu_c": 0.007735849056603774,
|
|
"calib/mu_w": 0.007886138613861386,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.0073392929542851525,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.20630434782608698,
|
|
"calib/step_q_c_n": 253.0,
|
|
"calib/step_q_gap": -0.04420050654284502,
|
|
"calib/step_q_w": 0.250504854368932,
|
|
"calib/step_q_w_n": 1030.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 749.0,
|
|
"completions/max_terminated_length": 749.0,
|
|
"completions/mean_length": 240.2578125,
|
|
"completions/mean_terminated_length": 241.20001220703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.015873249620199203,
|
|
"learning_rate": 9.722222222222224e-07,
|
|
"loss": 0.2549,
|
|
"num_tokens": 31482387.0,
|
|
"reward": 0.9976377487182617,
|
|
"reward_std": 0.0134980957955122,
|
|
"rewards/accuracy_reward_step": 0.20703125,
|
|
"rewards/final_brier_reward_step": 0.7921505570411682,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 165
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9185799043625593,
|
|
"aux_distill/mean_u": 0.30033257046494144,
|
|
"aux_distill/n_active_tok": 190.25,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5301592161665646,
|
|
"calib/avg_num_step_conf": 5.9453125,
|
|
"calib/ece": 0.26993333333333336,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001300750153092469,
|
|
"calib/mean_conf": 0.008498039215686273,
|
|
"calib/mu_c": 0.009436619718309862,
|
|
"calib/mu_w": 0.008135869565217392,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007924583002478301,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27285714285714285,
|
|
"calib/step_q_c_n": 392.0,
|
|
"calib/step_q_gap": -0.01204905183312266,
|
|
"calib/step_q_w": 0.2849061946902655,
|
|
"calib/step_q_w_n": 1130.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1515.0,
|
|
"completions/max_terminated_length": 1515.0,
|
|
"completions/mean_length": 284.56640625,
|
|
"completions/mean_terminated_length": 285.682373046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.17706666666666668,
|
|
"grad_norm": 0.015122036449611187,
|
|
"learning_rate": 9.444444444444445e-07,
|
|
"loss": 0.2623,
|
|
"num_tokens": 31661420.0,
|
|
"reward": 0.998643696308136,
|
|
"reward_std": 0.014669304713606834,
|
|
"rewards/accuracy_reward_step": 0.27734375,
|
|
"rewards/final_brier_reward_step": 0.723849892616272,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 166
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9328858200460672,
|
|
"aux_distill/mean_u": 0.30078049686394437,
|
|
"aux_distill/n_active_tok": 165.875,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.48206477732793523,
|
|
"calib/avg_num_step_conf": 5.20703125,
|
|
"calib/ece": 0.24657254901960782,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0004421052631578947,
|
|
"calib/mean_conf": 0.008329411764705882,
|
|
"calib/mu_c": 0.008,
|
|
"calib/mu_w": 0.008442105263157895,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008261493339766052,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2694563953488372,
|
|
"calib/step_q_c_n": 344.0,
|
|
"calib/step_q_gap": 0.02055042972699697,
|
|
"calib/step_q_w": 0.24890596562184022,
|
|
"calib/step_q_w_n": 989.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 858.0,
|
|
"completions/max_terminated_length": 858.0,
|
|
"completions/mean_length": 245.75390625,
|
|
"completions/mean_terminated_length": 246.71766662597656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 70.0,
|
|
"epoch": 0.17813333333333334,
|
|
"grad_norm": 0.01606198400259018,
|
|
"learning_rate": 9.166666666666666e-07,
|
|
"loss": 0.2581,
|
|
"num_tokens": 31829941.0,
|
|
"reward": 0.9980564117431641,
|
|
"reward_std": 0.014442476443946362,
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
|
"rewards/final_brier_reward_step": 0.7461129426956177,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 167
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9054408930242062,
|
|
"aux_distill/mean_u": 0.28933193706955174,
|
|
"aux_distill/n_active_tok": 154.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5035913806863528,
|
|
"calib/avg_num_step_conf": 4.85546875,
|
|
"calib/ece": 0.29549218749999995,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": -0.005436987593412176,
|
|
"calib/mean_conf": 0.0126328125,
|
|
"calib/mu_c": 0.00883116883116883,
|
|
"calib/mu_w": 0.014268156424581006,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.003671875,
|
|
"calib/std_conf": 0.05860738955831289,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22799168975069253,
|
|
"calib/step_q_c_n": 361.0,
|
|
"calib/step_q_gap": -0.03408654154182447,
|
|
"calib/step_q_w": 0.262078231292517,
|
|
"calib/step_q_w_n": 882.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 633.0,
|
|
"completions/max_terminated_length": 633.0,
|
|
"completions/mean_length": 235.59375,
|
|
"completions/mean_terminated_length": 236.5176544189453,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.016619674861431122,
|
|
"learning_rate": 8.88888888888889e-07,
|
|
"loss": 0.2645,
|
|
"num_tokens": 31994925.0,
|
|
"reward": 1.000859022140503,
|
|
"reward_std": 0.008521142415702343,
|
|
"rewards/accuracy_reward_step": 0.30078125,
|
|
"rewards/final_brier_reward_step": 0.7009367942810059,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 168
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.887091588228941,
|
|
"aux_distill/mean_u": 0.2387504252351404,
|
|
"aux_distill/n_active_tok": 168.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5559084071536019,
|
|
"calib/avg_num_step_conf": 5.2578125,
|
|
"calib/ece": 0.2358832889534415,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0013826844096682719,
|
|
"calib/mean_conf": 0.011175534575970305,
|
|
"calib/mu_c": 0.010129032258064517,
|
|
"calib/mu_w": 0.01151171666773279,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00196078431372549,
|
|
"calib/std_conf": 0.03209480971062626,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2120066889632107,
|
|
"calib/step_q_c_n": 299.0,
|
|
"calib/step_q_gap": -0.029857868111700853,
|
|
"calib/step_q_w": 0.24186455707491156,
|
|
"calib/step_q_w_n": 1047.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2568.0,
|
|
"completions/max_terminated_length": 2568.0,
|
|
"completions/mean_length": 266.71875,
|
|
"completions/mean_terminated_length": 266.71875,
|
|
"completions/min_length": 63.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.18026666666666666,
|
|
"grad_norm": 0.013696101494133472,
|
|
"learning_rate": 8.611111111111112e-07,
|
|
"loss": 0.2917,
|
|
"num_tokens": 32167389.0,
|
|
"reward": 0.9940655827522278,
|
|
"reward_std": 0.019284099340438843,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.7537561655044556,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 169
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8750657048076391,
|
|
"aux_distill/mean_u": 0.25825254583750035,
|
|
"aux_distill/n_active_tok": 182.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5602652825836216,
|
|
"calib/avg_num_step_conf": 5.69140625,
|
|
"calib/ece": 0.18969411764705885,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00228921568627451,
|
|
"calib/mean_conf": 0.010305882352941177,
|
|
"calib/mu_c": 0.012137254901960784,
|
|
"calib/mu_w": 0.009848039215686274,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009186123630711062,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3257341772151899,
|
|
"calib/step_q_c_n": 316.0,
|
|
"calib/step_q_gap": 0.0502609081529638,
|
|
"calib/step_q_w": 0.2754732690622261,
|
|
"calib/step_q_w_n": 1141.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1676.0,
|
|
"completions/max_terminated_length": 1676.0,
|
|
"completions/mean_length": 275.77734375,
|
|
"completions/mean_terminated_length": 275.77734375,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.18133333333333335,
|
|
"grad_norm": 0.012967259623110294,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 0.2707,
|
|
"num_tokens": 32342140.0,
|
|
"reward": 0.9984167814254761,
|
|
"reward_std": 0.014727575704455376,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.8015210628509521,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 170
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8613785710185766,
|
|
"aux_distill/mean_u": 0.24564890563767686,
|
|
"aux_distill/n_active_tok": 150.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5454712137880455,
|
|
"calib/avg_num_step_conf": 4.72265625,
|
|
"calib/ece": 0.1968359375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.020383204987165386,
|
|
"calib/mean_conf": 0.014101562500000001,
|
|
"calib/mu_c": 0.030185185185185186,
|
|
"calib/mu_w": 0.009801980198019802,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.062456027011478996,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2682475247524752,
|
|
"calib/step_q_c_n": 202.0,
|
|
"calib/step_q_gap": 0.008543651862703616,
|
|
"calib/step_q_w": 0.2597038728897716,
|
|
"calib/step_q_w_n": 1007.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 700.0,
|
|
"completions/max_terminated_length": 700.0,
|
|
"completions/mean_length": 236.4765625,
|
|
"completions/mean_terminated_length": 237.4039306640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.014070946723222733,
|
|
"learning_rate": 8.055555555555557e-07,
|
|
"loss": 0.2691,
|
|
"num_tokens": 32509574.0,
|
|
"reward": 1.0043174028396606,
|
|
"reward_std": 0.009339498355984688,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.7976972460746765,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 171
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8902777750045061,
|
|
"aux_distill/mean_u": 0.22226979449947362,
|
|
"aux_distill/n_active_tok": 158.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.46706539074960124,
|
|
"calib/avg_num_step_conf": 5.00390625,
|
|
"calib/ece": 0.2478359375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0007850079744816576,
|
|
"calib/mean_conf": 0.009976562500000001,
|
|
"calib/mu_c": 0.009393939393939395,
|
|
"calib/mu_w": 0.010178947368421053,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009167678314796705,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2509805194805195,
|
|
"calib/step_q_c_n": 308.0,
|
|
"calib/step_q_gap": -0.022046201999439352,
|
|
"calib/step_q_w": 0.2730267214799589,
|
|
"calib/step_q_w_n": 973.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 786.0,
|
|
"completions/max_terminated_length": 786.0,
|
|
"completions/mean_length": 238.98828125,
|
|
"completions/mean_terminated_length": 239.92550659179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.18346666666666667,
|
|
"grad_norm": 0.016252703964710236,
|
|
"learning_rate": 7.777777777777779e-07,
|
|
"loss": 0.2919,
|
|
"num_tokens": 32674107.0,
|
|
"reward": 1.0023300647735596,
|
|
"reward_std": 0.003432949772104621,
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
|
"rewards/final_brier_reward_step": 0.7468476295471191,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 172
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8446943629533052,
|
|
"aux_distill/mean_u": 0.2223728193424118,
|
|
"aux_distill/n_active_tok": 162.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5111847199518197,
|
|
"calib/avg_num_step_conf": 5.09375,
|
|
"calib/ece": 0.21993359375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001661275058074509,
|
|
"calib/mean_conf": 0.01053515625,
|
|
"calib/mu_c": 0.011813559322033899,
|
|
"calib/mu_w": 0.01015228426395939,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009616815431216612,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2718935483870968,
|
|
"calib/step_q_c_n": 310.0,
|
|
"calib/step_q_gap": -0.0031336145907704394,
|
|
"calib/step_q_w": 0.2750271629778672,
|
|
"calib/step_q_w_n": 994.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 622.0,
|
|
"completions/max_terminated_length": 622.0,
|
|
"completions/mean_length": 242.828125,
|
|
"completions/mean_terminated_length": 243.78041076660156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.18453333333333333,
|
|
"grad_norm": 0.014125216752290726,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.2574,
|
|
"num_tokens": 32839431.0,
|
|
"reward": 1.0026209354400635,
|
|
"reward_std": 0.004656236618757248,
|
|
"rewards/accuracy_reward_step": 0.23046875,
|
|
"rewards/final_brier_reward_step": 0.774773120880127,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 173
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8671461902558804,
|
|
"aux_distill/mean_u": 0.2609641669285706,
|
|
"aux_distill/n_active_tok": 158.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.48767991239048814,
|
|
"calib/avg_num_step_conf": 5.0,
|
|
"calib/ece": 0.2551796875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0008066332916145218,
|
|
"calib/mean_conf": 0.0104453125,
|
|
"calib/mu_c": 0.009852941176470587,
|
|
"calib/mu_w": 0.010659574468085109,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009069875097119241,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2377955271565495,
|
|
"calib/step_q_c_n": 313.0,
|
|
"calib/step_q_gap": -0.07030788545978972,
|
|
"calib/step_q_w": 0.3081034126163392,
|
|
"calib/step_q_w_n": 967.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 928.0,
|
|
"completions/max_terminated_length": 928.0,
|
|
"completions/mean_length": 246.46875,
|
|
"completions/mean_terminated_length": 247.435302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.015688760206103325,
|
|
"learning_rate": 7.222222222222222e-07,
|
|
"loss": 0.2571,
|
|
"num_tokens": 33006759.0,
|
|
"reward": 1.0025215148925781,
|
|
"reward_std": 0.00413602776825428,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7394180297851562,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 174
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8709164746105671,
|
|
"aux_distill/mean_u": 0.245912652958161,
|
|
"aux_distill/n_active_tok": 157.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5352510808114399,
|
|
"calib/avg_num_step_conf": 5.01171875,
|
|
"calib/ece": 0.232416015625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0007272198204190238,
|
|
"calib/mean_conf": 0.009771484375,
|
|
"calib/mu_c": 0.010322580645161292,
|
|
"calib/mu_w": 0.009595360824742268,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007668391351621986,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27035856573705186,
|
|
"calib/step_q_c_n": 251.0,
|
|
"calib/step_q_gap": -0.006758682324963683,
|
|
"calib/step_q_w": 0.27711724806201554,
|
|
"calib/step_q_w_n": 1032.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 944.0,
|
|
"completions/max_terminated_length": 944.0,
|
|
"completions/mean_length": 239.45703125,
|
|
"completions/mean_terminated_length": 240.39608764648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.18666666666666668,
|
|
"grad_norm": 0.013851930387318134,
|
|
"learning_rate": 6.944444444444446e-07,
|
|
"loss": 0.2803,
|
|
"num_tokens": 33173884.0,
|
|
"reward": 1.00242280960083,
|
|
"reward_std": 0.0037492546252906322,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.7626582384109497,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 175
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9305590968579054,
|
|
"aux_distill/mean_u": 0.29542864199826796,
|
|
"aux_distill/n_active_tok": 173.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5979081472161056,
|
|
"calib/avg_num_step_conf": 5.421875,
|
|
"calib/ece": 0.25588235294117645,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0033422459893048123,
|
|
"calib/mean_conf": 0.010784313725490196,
|
|
"calib/mu_c": 0.013235294117647059,
|
|
"calib/mu_w": 0.009893048128342246,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008953723566511192,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22322033898305083,
|
|
"calib/step_q_c_n": 295.0,
|
|
"calib/step_q_gap": -0.039495123048056247,
|
|
"calib/step_q_w": 0.26271546203110707,
|
|
"calib/step_q_w_n": 1093.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1004.0,
|
|
"completions/max_terminated_length": 1004.0,
|
|
"completions/mean_length": 256.49609375,
|
|
"completions/mean_terminated_length": 257.5019836425781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.18773333333333334,
|
|
"grad_norm": 0.014219074510037899,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": 0.2601,
|
|
"num_tokens": 33343611.0,
|
|
"reward": 0.9995114803314209,
|
|
"reward_std": 0.015234305523335934,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7373043298721313,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 176
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8567609488964081,
|
|
"aux_distill/mean_u": 0.2489543489347476,
|
|
"aux_distill/n_active_tok": 152.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.47199518196678997,
|
|
"calib/avg_num_step_conf": 4.84765625,
|
|
"calib/ece": 0.2191328125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0004444635636238498,
|
|
"calib/mean_conf": 0.0113359375,
|
|
"calib/mu_c": 0.011677966101694916,
|
|
"calib/mu_w": 0.011233502538071066,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.015259872492786227,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.17995348837209302,
|
|
"calib/step_q_c_n": 258.0,
|
|
"calib/step_q_gap": -0.09957255435425488,
|
|
"calib/step_q_w": 0.2795260427263479,
|
|
"calib/step_q_w_n": 983.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 915.0,
|
|
"completions/max_terminated_length": 915.0,
|
|
"completions/mean_length": 231.8515625,
|
|
"completions/mean_terminated_length": 232.76080322265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.014605239033699036,
|
|
"learning_rate": 6.388888888888889e-07,
|
|
"loss": 0.2796,
|
|
"num_tokens": 33506797.0,
|
|
"reward": 1.002510666847229,
|
|
"reward_std": 0.004579597152769566,
|
|
"rewards/accuracy_reward_step": 0.23046875,
|
|
"rewards/final_brier_reward_step": 0.7745527029037476,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 177
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9091615509241819,
|
|
"aux_distill/mean_u": 0.29264410281950765,
|
|
"aux_distill/n_active_tok": 164.75,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5167112299465241,
|
|
"calib/avg_num_step_conf": 5.1484375,
|
|
"calib/ece": 0.2567058823529412,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00045454545454545747,
|
|
"calib/mean_conf": 0.00996078431372549,
|
|
"calib/mu_c": 0.010294117647058825,
|
|
"calib/mu_w": 0.009839572192513368,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008092506299557722,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.25118611987381706,
|
|
"calib/step_q_c_n": 317.0,
|
|
"calib/step_q_gap": -7.961439191722652e-05,
|
|
"calib/step_q_w": 0.2512657342657343,
|
|
"calib/step_q_w_n": 1001.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2546.0,
|
|
"completions/max_terminated_length": 2546.0,
|
|
"completions/mean_length": 255.21875,
|
|
"completions/mean_terminated_length": 255.21875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.18986666666666666,
|
|
"grad_norm": 0.013889545574784279,
|
|
"learning_rate": 6.111111111111112e-07,
|
|
"loss": 0.3144,
|
|
"num_tokens": 33678205.0,
|
|
"reward": 0.9987460374832153,
|
|
"reward_std": 0.015478258952498436,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7357734441757202,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 178
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.889463035389781,
|
|
"aux_distill/mean_u": 0.25818936434507933,
|
|
"aux_distill/n_active_tok": 160.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5686725944744363,
|
|
"calib/avg_num_step_conf": 5.0234375,
|
|
"calib/ece": 0.25311886563527475,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.002126533770932748,
|
|
"calib/mean_conf": 0.00962623240394095,
|
|
"calib/mu_c": 0.01119402985074627,
|
|
"calib/mu_w": 0.009067496079813522,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008535101580992532,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26551572327044026,
|
|
"calib/step_q_c_n": 318.0,
|
|
"calib/step_q_gap": -0.020559035122322822,
|
|
"calib/step_q_w": 0.2860747583927631,
|
|
"calib/step_q_w_n": 968.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 997.0,
|
|
"completions/max_terminated_length": 997.0,
|
|
"completions/mean_length": 252.7890625,
|
|
"completions/mean_terminated_length": 253.78041076660156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.19093333333333334,
|
|
"grad_norm": 0.012719987891614437,
|
|
"learning_rate": 5.833333333333334e-07,
|
|
"loss": 0.2694,
|
|
"num_tokens": 33849183.0,
|
|
"reward": 0.9989409446716309,
|
|
"reward_std": 0.014448285102844238,
|
|
"rewards/accuracy_reward_step": 0.26171875,
|
|
"rewards/final_brier_reward_step": 0.7400695085525513,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 179
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9014210309833288,
|
|
"aux_distill/mean_u": 0.2916708409763812,
|
|
"aux_distill/n_active_tok": 158.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4855459970549485,
|
|
"calib/avg_num_step_conf": 4.97265625,
|
|
"calib/ece": 0.25894921875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0011935983879717872,
|
|
"calib/mean_conf": 0.010582031249999999,
|
|
"calib/mu_c": 0.009710144927536232,
|
|
"calib/mu_w": 0.01090374331550802,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008816328366957723,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26728086419753083,
|
|
"calib/step_q_c_n": 324.0,
|
|
"calib/step_q_gap": 0.022341559666445493,
|
|
"calib/step_q_w": 0.24493930453108534,
|
|
"calib/step_q_w_n": 949.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 776.0,
|
|
"completions/max_terminated_length": 776.0,
|
|
"completions/mean_length": 250.1953125,
|
|
"completions/mean_terminated_length": 251.17648315429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.015904264524579048,
|
|
"learning_rate": 5.555555555555555e-07,
|
|
"loss": 0.3061,
|
|
"num_tokens": 34017089.0,
|
|
"reward": 1.002522349357605,
|
|
"reward_std": 0.003945849370211363,
|
|
"rewards/accuracy_reward_step": 0.26953125,
|
|
"rewards/final_brier_reward_step": 0.73551344871521,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 180
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8790353294461966,
|
|
"aux_distill/mean_u": 0.21719419774970758,
|
|
"aux_distill/n_active_tok": 149.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4607611160406192,
|
|
"calib/avg_num_step_conf": 4.65625,
|
|
"calib/ece": 0.18908554687499998,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": -0.00896292024055999,
|
|
"calib/mean_conf": 0.016226953125,
|
|
"calib/mu_c": 0.008979591836734696,
|
|
"calib/mu_w": 0.017942512077294685,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.006953125,
|
|
"calib/std_conf": 0.07790991452531286,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24873873873873872,
|
|
"calib/step_q_c_n": 222.0,
|
|
"calib/step_q_gap": -0.02705940559115813,
|
|
"calib/step_q_w": 0.27579814432989685,
|
|
"calib/step_q_w_n": 970.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 564.0,
|
|
"completions/max_terminated_length": 564.0,
|
|
"completions/mean_length": 222.2265625,
|
|
"completions/mean_terminated_length": 223.09805297851562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.19306666666666666,
|
|
"grad_norm": 0.013717937283217907,
|
|
"learning_rate": 5.277777777777779e-07,
|
|
"loss": 0.2549,
|
|
"num_tokens": 34180243.0,
|
|
"reward": 0.9985520839691162,
|
|
"reward_std": 0.011695407330989838,
|
|
"rewards/accuracy_reward_step": 0.19140625,
|
|
"rewards/final_brier_reward_step": 0.8056979775428772,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 181
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8650721274316311,
|
|
"aux_distill/mean_u": 0.214282604759036,
|
|
"aux_distill/n_active_tok": 159.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5106608072916666,
|
|
"calib/avg_num_step_conf": 5.0078125,
|
|
"calib/ece": 0.24,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008333333333333335,
|
|
"calib/mean_conf": 0.010000000000000002,
|
|
"calib/mu_c": 0.010624999999999999,
|
|
"calib/mu_w": 0.009791666666666666,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007603453162872775,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24737201365187714,
|
|
"calib/step_q_c_n": 293.0,
|
|
"calib/step_q_gap": -0.039943840746505116,
|
|
"calib/step_q_w": 0.28731585439838225,
|
|
"calib/step_q_w_n": 989.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 679.0,
|
|
"completions/max_terminated_length": 679.0,
|
|
"completions/mean_length": 247.671875,
|
|
"completions/mean_terminated_length": 248.64315795898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 84.0,
|
|
"epoch": 0.19413333333333332,
|
|
"grad_norm": 0.01505859661847353,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 0.2828,
|
|
"num_tokens": 34349807.0,
|
|
"reward": 1.002577304840088,
|
|
"reward_std": 0.004599182400852442,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7551547288894653,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 182
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8601448759436607,
|
|
"aux_distill/mean_u": 0.24111330375244838,
|
|
"aux_distill/n_active_tok": 154.0,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4941244239631336,
|
|
"calib/avg_num_step_conf": 4.86328125,
|
|
"calib/ece": 0.2634375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00039324116743471964,
|
|
"calib/mean_conf": 0.01,
|
|
"calib/mu_c": 0.010285714285714289,
|
|
"calib/mu_w": 0.009892473118279569,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.00960143218483576,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.22008132530120483,
|
|
"calib/step_q_c_n": 332.0,
|
|
"calib/step_q_gap": -0.026885815991237655,
|
|
"calib/step_q_w": 0.2469671412924425,
|
|
"calib/step_q_w_n": 913.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 902.0,
|
|
"completions/max_terminated_length": 902.0,
|
|
"completions/mean_length": 229.36328125,
|
|
"completions/mean_terminated_length": 230.26275634765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.016035255044698715,
|
|
"learning_rate": 4.7222222222222226e-07,
|
|
"loss": 0.2392,
|
|
"num_tokens": 34515204.0,
|
|
"reward": 1.002716302871704,
|
|
"reward_std": 0.00421358086168766,
|
|
"rewards/accuracy_reward_step": 0.2734375,
|
|
"rewards/final_brier_reward_step": 0.7319953441619873,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 183
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.920300142839551,
|
|
"aux_distill/mean_u": 0.27552763185378215,
|
|
"aux_distill/n_active_tok": 155.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5023314790575917,
|
|
"calib/avg_num_step_conf": 4.85546875,
|
|
"calib/ece": 0.24110196078431373,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.00037090968586387524,
|
|
"calib/mean_conf": 0.00987843137254902,
|
|
"calib/mu_c": 0.01015625,
|
|
"calib/mu_w": 0.009785340314136125,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008278777823162683,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24364912280701753,
|
|
"calib/step_q_c_n": 285.0,
|
|
"calib/step_q_gap": -0.03158469765227265,
|
|
"calib/step_q_w": 0.2752338204592902,
|
|
"calib/step_q_w_n": 958.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 733.0,
|
|
"completions/max_terminated_length": 733.0,
|
|
"completions/mean_length": 240.50390625,
|
|
"completions/mean_terminated_length": 241.4470672607422,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.19626666666666667,
|
|
"grad_norm": 0.014850256964564323,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": 0.2515,
|
|
"num_tokens": 34682053.0,
|
|
"reward": 0.9985500574111938,
|
|
"reward_std": 0.015233214944601059,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7510063648223877,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 184
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8545205853879452,
|
|
"aux_distill/mean_u": 0.23646542383363647,
|
|
"aux_distill/n_active_tok": 164.25,
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.5184684684684685,
|
|
"calib/avg_num_step_conf": 5.234375,
|
|
"calib/ece": 0.28124015748031495,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.000375975975975976,
|
|
"calib/mean_conf": 0.010098425196850395,
|
|
"calib/mu_c": 0.010364864864864865,
|
|
"calib/mu_w": 0.00998888888888889,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007542671097414667,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.3316076923076923,
|
|
"calib/step_q_c_n": 390.0,
|
|
"calib/step_q_gap": 0.08417506072874495,
|
|
"calib/step_q_w": 0.24743263157894738,
|
|
"calib/step_q_w_n": 950.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 2685.0,
|
|
"completions/max_terminated_length": 2685.0,
|
|
"completions/mean_length": 256.17578125,
|
|
"completions/mean_terminated_length": 257.180419921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.19733333333333333,
|
|
"grad_norm": 0.012667279690504074,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": 0.2685,
|
|
"num_tokens": 34854554.0,
|
|
"reward": 0.9951047897338867,
|
|
"reward_std": 0.02564604952931404,
|
|
"rewards/accuracy_reward_step": 0.2890625,
|
|
"rewards/final_brier_reward_step": 0.7089595794677734,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"step": 185
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9243546165525913,
|
|
"aux_distill/mean_u": 0.2849980260621753,
|
|
"aux_distill/n_active_tok": 165.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5357444651005577,
|
|
"calib/avg_num_step_conf": 5.16796875,
|
|
"calib/ece": 0.22968627450980394,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0004030758830488434,
|
|
"calib/mean_conf": 0.009529411764705882,
|
|
"calib/mu_c": 0.009836065573770493,
|
|
"calib/mu_w": 0.00943298969072165,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008664448300951515,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2763677685950413,
|
|
"calib/step_q_c_n": 242.0,
|
|
"calib/step_q_gap": 0.03412355027866759,
|
|
"calib/step_q_w": 0.24224421831637372,
|
|
"calib/step_q_w_n": 1081.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 928.0,
|
|
"completions/max_terminated_length": 928.0,
|
|
"completions/mean_length": 260.7578125,
|
|
"completions/mean_terminated_length": 261.7803955078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.014339666813611984,
|
|
"learning_rate": 3.8888888888888895e-07,
|
|
"loss": 0.3138,
|
|
"num_tokens": 35026348.0,
|
|
"reward": 0.9983549118041992,
|
|
"reward_std": 0.01398992445319891,
|
|
"rewards/accuracy_reward_step": 0.23828125,
|
|
"rewards/final_brier_reward_step": 0.7623347640037537,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 186
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8787559699267149,
|
|
"aux_distill/mean_u": 0.23315337709486664,
|
|
"aux_distill/n_active_tok": 152.125,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5551409618573797,
|
|
"calib/avg_num_step_conf": 4.75390625,
|
|
"calib/ece": 0.20044705882352942,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008891652846876718,
|
|
"calib/mean_conf": 0.01131764705882353,
|
|
"calib/mu_c": 0.012018518518518519,
|
|
"calib/mu_w": 0.011129353233830847,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009315656353000426,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.3372490118577075,
|
|
"calib/step_q_c_n": 253.0,
|
|
"calib/step_q_gap": 0.05299071310252079,
|
|
"calib/step_q_w": 0.2842582987551867,
|
|
"calib/step_q_w_n": 964.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1499.0,
|
|
"completions/max_terminated_length": 1499.0,
|
|
"completions/mean_length": 233.046875,
|
|
"completions/mean_terminated_length": 233.96080017089844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.19946666666666665,
|
|
"grad_norm": 0.01473988126963377,
|
|
"learning_rate": 3.611111111111111e-07,
|
|
"loss": 0.2494,
|
|
"num_tokens": 35187552.0,
|
|
"reward": 0.9985218644142151,
|
|
"reward_std": 0.0148072000592947,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.790012538433075,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 187
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9031260069459677,
|
|
"aux_distill/mean_u": 0.26083758666871576,
|
|
"aux_distill/n_active_tok": 153.75,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5989276703111859,
|
|
"calib/avg_num_step_conf": 4.828125,
|
|
"calib/ece": 0.3093046875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.003002803476310628,
|
|
"calib/mean_conf": 0.0110078125,
|
|
"calib/mu_c": 0.01304878048780488,
|
|
"calib/mu_w": 0.010045977011494251,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009012141197564749,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.27171988795518204,
|
|
"calib/step_q_c_n": 357.0,
|
|
"calib/step_q_gap": 0.03275856827372586,
|
|
"calib/step_q_w": 0.23896131968145617,
|
|
"calib/step_q_w_n": 879.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 600.0,
|
|
"completions/max_terminated_length": 600.0,
|
|
"completions/mean_length": 235.7421875,
|
|
"completions/mean_terminated_length": 236.66668701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.20053333333333334,
|
|
"grad_norm": 0.015368984080851078,
|
|
"learning_rate": 3.3333333333333335e-07,
|
|
"loss": 0.2734,
|
|
"num_tokens": 35351974.0,
|
|
"reward": 1.0040783882141113,
|
|
"reward_std": 0.005144909024238586,
|
|
"rewards/accuracy_reward_step": 0.3203125,
|
|
"rewards/final_brier_reward_step": 0.687844455242157,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 188
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8606499591842294,
|
|
"aux_distill/mean_u": 0.24700294945643025,
|
|
"aux_distill/n_active_tok": 164.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5778454510848877,
|
|
"calib/avg_num_step_conf": 5.1953125,
|
|
"calib/ece": 0.2675390625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0026090597639893427,
|
|
"calib/mean_conf": 0.0098046875,
|
|
"calib/mu_c": 0.011690140845070423,
|
|
"calib/mu_w": 0.00908108108108108,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007927955475867896,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2618125,
|
|
"calib/step_q_c_n": 320.0,
|
|
"calib/step_q_gap": 0.009662995049504963,
|
|
"calib/step_q_w": 0.25214950495049504,
|
|
"calib/step_q_w_n": 1010.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 734.0,
|
|
"completions/max_terminated_length": 734.0,
|
|
"completions/mean_length": 245.53515625,
|
|
"completions/mean_terminated_length": 246.498046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 64.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.013666624203324318,
|
|
"learning_rate": 3.055555555555556e-07,
|
|
"loss": 0.2603,
|
|
"num_tokens": 35522599.0,
|
|
"reward": 1.0031626224517822,
|
|
"reward_std": 0.0042640105821192265,
|
|
"rewards/accuracy_reward_step": 0.27734375,
|
|
"rewards/final_brier_reward_step": 0.7289816737174988,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 189
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8491978226229548,
|
|
"aux_distill/mean_u": 0.2470079832588207,
|
|
"aux_distill/n_active_tok": 177.0,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.5465555206039635,
|
|
"calib/avg_num_step_conf": 5.53125,
|
|
"calib/ece": 0.256035294117647,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0005628342245989348,
|
|
"calib/mean_conf": 0.01063137254901961,
|
|
"calib/mu_c": 0.011044117647058826,
|
|
"calib/mu_w": 0.010481283422459891,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.009472219240061723,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.24370666666666663,
|
|
"calib/step_q_c_n": 375.0,
|
|
"calib/step_q_gap": -0.0425815177713737,
|
|
"calib/step_q_w": 0.2862881844380403,
|
|
"calib/step_q_w_n": 1041.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 859.0,
|
|
"completions/max_terminated_length": 859.0,
|
|
"completions/mean_length": 256.40625,
|
|
"completions/mean_terminated_length": 257.4117736816406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.20266666666666666,
|
|
"grad_norm": 0.014104792848229408,
|
|
"learning_rate": 2.7777777777777776e-07,
|
|
"loss": 0.2561,
|
|
"num_tokens": 35693847.0,
|
|
"reward": 0.9989263415336609,
|
|
"reward_std": 0.015147150494158268,
|
|
"rewards/accuracy_reward_step": 0.265625,
|
|
"rewards/final_brier_reward_step": 0.7361339330673218,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 190
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8412365037947893,
|
|
"aux_distill/mean_u": 0.20567782297729037,
|
|
"aux_distill/n_active_tok": 158.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5478466910542068,
|
|
"calib/avg_num_step_conf": 5.0234375,
|
|
"calib/ece": 0.231796875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0014000665114732302,
|
|
"calib/mean_conf": 0.010390624999999999,
|
|
"calib/mu_c": 0.011451612903225808,
|
|
"calib/mu_w": 0.010051546391752578,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008043081630157373,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2528014184397163,
|
|
"calib/step_q_c_n": 282.0,
|
|
"calib/step_q_gap": -0.007035234946737856,
|
|
"calib/step_q_w": 0.2598366533864542,
|
|
"calib/step_q_w_n": 1004.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1019.0,
|
|
"completions/max_terminated_length": 1019.0,
|
|
"completions/mean_length": 231.63671875,
|
|
"completions/mean_terminated_length": 232.54510498046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.20373333333333332,
|
|
"grad_norm": 0.01434285193681717,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": 0.2495,
|
|
"num_tokens": 35857314.0,
|
|
"reward": 1.0026870965957642,
|
|
"reward_std": 0.004015287384390831,
|
|
"rewards/accuracy_reward_step": 0.2421875,
|
|
"rewards/final_brier_reward_step": 0.7631866931915283,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 191
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8681392464786768,
|
|
"aux_distill/mean_u": 0.22151961991092448,
|
|
"aux_distill/n_active_tok": 147.625,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5210526315789474,
|
|
"calib/avg_num_step_conf": 4.625,
|
|
"calib/ece": 0.24690286144578313,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0008566783881939263,
|
|
"calib/mean_conf": 0.010909638554216866,
|
|
"calib/mu_c": 0.011545454545454546,
|
|
"calib/mu_w": 0.01068877615726062,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008304116287721583,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.23784768211920534,
|
|
"calib/step_q_c_n": 302.0,
|
|
"calib/step_q_gap": -0.0034210253637878274,
|
|
"calib/step_q_w": 0.24126870748299317,
|
|
"calib/step_q_w_n": 882.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 967.0,
|
|
"completions/max_terminated_length": 967.0,
|
|
"completions/mean_length": 235.9375,
|
|
"completions/mean_terminated_length": 236.86276245117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.015469144098460674,
|
|
"learning_rate": 2.2222222222222224e-07,
|
|
"loss": 0.2585,
|
|
"num_tokens": 36022690.0,
|
|
"reward": 1.0028825998306274,
|
|
"reward_std": 0.004011563956737518,
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
|
"rewards/final_brier_reward_step": 0.7479526400566101,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 192
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9071143828332424,
|
|
"aux_distill/mean_u": 0.25343324748730706,
|
|
"aux_distill/n_active_tok": 157.25,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.531650641025641,
|
|
"calib/avg_num_step_conf": 4.96875,
|
|
"calib/ece": 0.1779375,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0002820512820512818,
|
|
"calib/mean_conf": 0.009562500000000002,
|
|
"calib/mu_c": 0.009791666666666666,
|
|
"calib/mu_w": 0.009509615384615384,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008472151069828723,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.20765566037735847,
|
|
"calib/step_q_c_n": 212.0,
|
|
"calib/step_q_gap": -0.03716226415094348,
|
|
"calib/step_q_w": 0.24481792452830195,
|
|
"calib/step_q_w_n": 1060.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 797.0,
|
|
"completions/max_terminated_length": 797.0,
|
|
"completions/mean_length": 246.375,
|
|
"completions/mean_terminated_length": 247.3411865234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.20586666666666667,
|
|
"grad_norm": 0.013341576792299747,
|
|
"learning_rate": 1.9444444444444447e-07,
|
|
"loss": 0.2691,
|
|
"num_tokens": 36191474.0,
|
|
"reward": 1.0017542839050293,
|
|
"reward_std": 0.002907798858359456,
|
|
"rewards/accuracy_reward_step": 0.1875,
|
|
"rewards/final_brier_reward_step": 0.8160086870193481,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 193
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8788082543760538,
|
|
"aux_distill/mean_u": 0.25585743095938557,
|
|
"aux_distill/n_active_tok": 150.875,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4472084024322831,
|
|
"calib/avg_num_step_conf": 4.7421875,
|
|
"calib/ece": 0.2514921875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0013176972281449904,
|
|
"calib/mean_conf": 0.0102265625,
|
|
"calib/mu_c": 0.009253731343283582,
|
|
"calib/mu_w": 0.010571428571428572,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007982123428862382,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.23755555555555555,
|
|
"calib/step_q_c_n": 315.0,
|
|
"calib/step_q_gap": -0.026266468916079616,
|
|
"calib/step_q_w": 0.26382202447163516,
|
|
"calib/step_q_w_n": 899.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 608.0,
|
|
"completions/max_terminated_length": 608.0,
|
|
"completions/mean_length": 231.01953125,
|
|
"completions/mean_terminated_length": 231.92550659179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.20693333333333333,
|
|
"grad_norm": 0.015607761219143867,
|
|
"learning_rate": 1.6666666666666668e-07,
|
|
"loss": 0.2627,
|
|
"num_tokens": 36356559.0,
|
|
"reward": 1.0023376941680908,
|
|
"reward_std": 0.004235986620187759,
|
|
"rewards/accuracy_reward_step": 0.26171875,
|
|
"rewards/final_brier_reward_step": 0.7429567575454712,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 194
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9156402302905917,
|
|
"aux_distill/mean_u": 0.2884676813592923,
|
|
"aux_distill/n_active_tok": 154.625,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.4467803587180241,
|
|
"calib/avg_num_step_conf": 4.83203125,
|
|
"calib/ece": 0.2919058823529412,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.00392156862745098,
|
|
"calib/gap": -0.006901940605704204,
|
|
"calib/mean_conf": 0.013976470588235295,
|
|
"calib/mu_c": 0.009131578947368421,
|
|
"calib/mu_w": 0.016033519553072625,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.00392156862745098,
|
|
"calib/std_conf": 0.062428118941724314,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2698305084745763,
|
|
"calib/step_q_c_n": 354.0,
|
|
"calib/step_q_gap": 0.020102309154078013,
|
|
"calib/step_q_w": 0.24972819932049828,
|
|
"calib/step_q_w_n": 883.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2493.0,
|
|
"completions/max_terminated_length": 2493.0,
|
|
"completions/mean_length": 261.703125,
|
|
"completions/mean_terminated_length": 261.703125,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.01972484402358532,
|
|
"learning_rate": 1.3888888888888888e-07,
|
|
"loss": 0.2632,
|
|
"num_tokens": 36529539.0,
|
|
"reward": 0.9967663288116455,
|
|
"reward_std": 0.020737262442708015,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.7005640268325806,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 195
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.9041774030774832,
|
|
"aux_distill/mean_u": 0.2440162551747133,
|
|
"aux_distill/n_active_tok": 149.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5318165784832452,
|
|
"calib/avg_num_step_conf": 4.7421875,
|
|
"calib/ece": 0.30703515625,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001443738977072312,
|
|
"calib/mean_conf": 0.00937109375,
|
|
"calib/mu_c": 0.010358024691358025,
|
|
"calib/mu_w": 0.008914285714285713,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007043321885212327,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.26571623931623933,
|
|
"calib/step_q_c_n": 351.0,
|
|
"calib/step_q_gap": 0.008513458319715561,
|
|
"calib/step_q_w": 0.25720278099652377,
|
|
"calib/step_q_w_n": 863.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 555.0,
|
|
"completions/max_terminated_length": 555.0,
|
|
"completions/mean_length": 222.2421875,
|
|
"completions/mean_terminated_length": 223.11373901367188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.20906666666666668,
|
|
"grad_norm": 0.014004452154040337,
|
|
"learning_rate": 1.1111111111111112e-07,
|
|
"loss": 0.2739,
|
|
"num_tokens": 36688977.0,
|
|
"reward": 1.0032086372375488,
|
|
"reward_std": 0.004075036384165287,
|
|
"rewards/accuracy_reward_step": 0.31640625,
|
|
"rewards/final_brier_reward_step": 0.6900110244750977,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 196
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.932471988722682,
|
|
"aux_distill/mean_u": 0.3058232737935441,
|
|
"aux_distill/n_active_tok": 152.375,
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.538421052631579,
|
|
"calib/avg_num_step_conf": 4.76171875,
|
|
"calib/ece": 0.2446549019607843,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.001113765182186234,
|
|
"calib/mean_conf": 0.010247058823529411,
|
|
"calib/mu_c": 0.011076923076923076,
|
|
"calib/mu_w": 0.009963157894736842,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008339237585435717,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"calib/step_q_c": 0.2647366666666666,
|
|
"calib/step_q_c_n": 300.0,
|
|
"calib/step_q_gap": 0.012387373957199799,
|
|
"calib/step_q_w": 0.2523492927094668,
|
|
"calib/step_q_w_n": 919.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 2308.0,
|
|
"completions/max_terminated_length": 2308.0,
|
|
"completions/mean_length": 243.52734375,
|
|
"completions/mean_terminated_length": 243.52734375,
|
|
"completions/min_length": 55.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.21013333333333334,
|
|
"grad_norm": 0.01720503158867359,
|
|
"learning_rate": 8.333333333333334e-08,
|
|
"loss": 0.3229,
|
|
"num_tokens": 36856376.0,
|
|
"reward": 0.9988193511962891,
|
|
"reward_std": 0.014782923273742199,
|
|
"rewards/accuracy_reward_step": 0.25390625,
|
|
"rewards/final_brier_reward_step": 0.7476386427879333,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"step": 197
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8915483225136995,
|
|
"aux_distill/mean_u": 0.22846494568973005,
|
|
"aux_distill/n_active_tok": 151.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5470469346507971,
|
|
"calib/avg_num_step_conf": 4.859375,
|
|
"calib/ece": 0.2742578125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0020008982708286573,
|
|
"calib/mean_conf": 0.0108984375,
|
|
"calib/mu_c": 0.012328767123287673,
|
|
"calib/mu_w": 0.010327868852459015,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.008859440448391409,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.21185573770491803,
|
|
"calib/step_q_c_n": 305.0,
|
|
"calib/step_q_gap": -0.042164390090609155,
|
|
"calib/step_q_w": 0.2540201277955272,
|
|
"calib/step_q_w_n": 939.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 793.0,
|
|
"completions/max_terminated_length": 793.0,
|
|
"completions/mean_length": 223.81640625,
|
|
"completions/mean_terminated_length": 224.6941375732422,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.017510568723082542,
|
|
"learning_rate": 5.555555555555556e-08,
|
|
"loss": 0.2754,
|
|
"num_tokens": 37019057.0,
|
|
"reward": 1.003416895866394,
|
|
"reward_std": 0.004575707949697971,
|
|
"rewards/accuracy_reward_step": 0.28515625,
|
|
"rewards/final_brier_reward_step": 0.7216777205467224,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 198
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8829764146357775,
|
|
"aux_distill/mean_u": 0.2720692958175646,
|
|
"aux_distill/n_active_tok": 155.5,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.4773125996810207,
|
|
"calib/avg_num_step_conf": 4.91015625,
|
|
"calib/ece": 0.247203125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": -0.0006169059011164255,
|
|
"calib/mean_conf": 0.010609375,
|
|
"calib/mu_c": 0.010151515151515154,
|
|
"calib/mu_w": 0.01076842105263158,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.007613961656678802,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.2470553935860058,
|
|
"calib/step_q_c_n": 343.0,
|
|
"calib/step_q_gap": 0.014188435161498175,
|
|
"calib/step_q_w": 0.23286695842450764,
|
|
"calib/step_q_w_n": 914.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 794.0,
|
|
"completions/max_terminated_length": 794.0,
|
|
"completions/mean_length": 237.01171875,
|
|
"completions/mean_terminated_length": 237.94119262695312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.21226666666666666,
|
|
"grad_norm": 0.01510292012244463,
|
|
"learning_rate": 2.777777777777778e-08,
|
|
"loss": 0.2639,
|
|
"num_tokens": 37183932.0,
|
|
"reward": 1.002531886100769,
|
|
"reward_std": 0.0038856856990605593,
|
|
"rewards/accuracy_reward_step": 0.2578125,
|
|
"rewards/final_brier_reward_step": 0.7472513318061829,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 199
|
|
},
|
|
{
|
|
"aux_distill/lambda": 0.30000000000000004,
|
|
"aux_distill/loss": 0.8596922922879457,
|
|
"aux_distill/mean_u": 0.25277857857710645,
|
|
"aux_distill/n_active_tok": 154.375,
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5928548177083334,
|
|
"calib/avg_num_step_conf": 4.8828125,
|
|
"calib/ece": 0.236171875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.01760416666666667,
|
|
"calib/mean_conf": 0.013828125,
|
|
"calib/mu_c": 0.027031250000000003,
|
|
"calib/mu_w": 0.009427083333333332,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0,
|
|
"calib/std_conf": 0.062313736118005135,
|
|
"calib/step_conf_rate": 1.0,
|
|
"calib/step_q_c": 0.18862068965517242,
|
|
"calib/step_q_c_n": 261.0,
|
|
"calib/step_q_gap": -0.09334189881803284,
|
|
"calib/step_q_w": 0.28196258847320527,
|
|
"calib/step_q_w_n": 989.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 944.0,
|
|
"completions/max_terminated_length": 944.0,
|
|
"completions/mean_length": 250.58203125,
|
|
"completions/mean_terminated_length": 251.56472778320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.21333333333333335,
|
|
"grad_norm": 0.013802732340991497,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.2705,
|
|
"num_tokens": 37356129.0,
|
|
"reward": 1.004720687866211,
|
|
"reward_std": 0.008684433996677399,
|
|
"rewards/accuracy_reward_step": 0.25,
|
|
"rewards/final_brier_reward_step": 0.7594413757324219,
|
|
"rewards/format_reward_step": 1.0,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.21333333333333335,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.2853034897893667,
|
|
"train_runtime": 14810.4923,
|
|
"train_samples_per_second": 3.457,
|
|
"train_steps_per_second": 0.014
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 37356129,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 20,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|