7059 lines
249 KiB
JSON
7059 lines
249 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.32,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calib/answer_extract_rate": 0.41015625,
|
|
"calib/auroc": 0.5004346566212692,
|
|
"calib/avg_num_step_conf": 2.54296875,
|
|
"calib/ece": 0.6533783783783784,
|
|
"calib/final_conf_rate": 0.578125,
|
|
"calib/format_rate": 0.328125,
|
|
"calib/frac_conf_gt_0.9": 0.49324324324324326,
|
|
"calib/gap": 0.03429150970733108,
|
|
"calib/mean_conf": 0.8493243243243244,
|
|
"calib/mu_c": 0.876896551724138,
|
|
"calib/mu_w": 0.842605042016807,
|
|
"calib/nonempty_final_conf_rate": 0.578125,
|
|
"calib/nonempty_reasoning_rate": 0.578125,
|
|
"calib/nonempty_step_conf_rate": 0.5078125,
|
|
"calib/pce": 0.6533783783783784,
|
|
"calib/std_conf": 0.20379395450997492,
|
|
"calib/step_conf_rate": 0.5078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 1348.0,
|
|
"completions/max_terminated_length": 1348.0,
|
|
"completions/mean_length": 229.34765625,
|
|
"completions/mean_terminated_length": 239.64488220214844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0016,
|
|
"grad_norm": 1.1471130847930908,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": -0.0995,
|
|
"num_tokens": 384153.0,
|
|
"reward": 0.43480801582336426,
|
|
"reward_std": 0.6504053473472595,
|
|
"rewards/accuracy_reward_step": 0.1171875,
|
|
"rewards/final_brier_reward_step": 0.14922383427619934,
|
|
"rewards/format_reward_step": 0.328125,
|
|
"rewards/stepwise_brier_reward": 0.23063313961029053,
|
|
"step": 1
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.4765625,
|
|
"calib/auroc": 0.5296425457715781,
|
|
"calib/avg_num_step_conf": 2.4921875,
|
|
"calib/ece": 0.638447204968944,
|
|
"calib/final_conf_rate": 0.62890625,
|
|
"calib/format_rate": 0.37890625,
|
|
"calib/frac_conf_gt_0.9": 0.5217391304347826,
|
|
"calib/gap": 0.03945510026155208,
|
|
"calib/mean_conf": 0.8682608695652173,
|
|
"calib/mu_c": 0.8986486486486487,
|
|
"calib/mu_w": 0.8591935483870966,
|
|
"calib/nonempty_final_conf_rate": 0.62890625,
|
|
"calib/nonempty_reasoning_rate": 0.58984375,
|
|
"calib/nonempty_step_conf_rate": 0.5,
|
|
"calib/pce": 0.638447204968944,
|
|
"calib/std_conf": 0.1677382063219749,
|
|
"calib/step_conf_rate": 0.5,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 1437.0,
|
|
"completions/max_terminated_length": 1437.0,
|
|
"completions/mean_length": 229.40625,
|
|
"completions/mean_terminated_length": 239.70611572265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 2.901979684829712,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": -0.1244,
|
|
"num_tokens": 790441.0,
|
|
"reward": 0.511868417263031,
|
|
"reward_std": 0.6400600075721741,
|
|
"rewards/accuracy_reward_step": 0.14453125,
|
|
"rewards/final_brier_reward_step": 0.1580679714679718,
|
|
"rewards/format_reward_step": 0.37890625,
|
|
"rewards/stepwise_brier_reward": 0.26440560817718506,
|
|
"step": 2
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.41796875,
|
|
"calib/auroc": 0.5543981481481481,
|
|
"calib/avg_num_step_conf": 2.10546875,
|
|
"calib/ece": 0.6660000000000001,
|
|
"calib/final_conf_rate": 0.546875,
|
|
"calib/format_rate": 0.3359375,
|
|
"calib/frac_conf_gt_0.9": 0.6214285714285714,
|
|
"calib/gap": 0.04876157407407422,
|
|
"calib/mean_conf": 0.8945714285714286,
|
|
"calib/mu_c": 0.9321875,
|
|
"calib/mu_w": 0.8834259259259257,
|
|
"calib/nonempty_final_conf_rate": 0.546875,
|
|
"calib/nonempty_reasoning_rate": 0.53515625,
|
|
"calib/nonempty_step_conf_rate": 0.46875,
|
|
"calib/pce": 0.6660000000000001,
|
|
"calib/std_conf": 0.13547994806070235,
|
|
"calib/step_conf_rate": 0.46875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03125,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 266.99609375,
|
|
"completions/mean_terminated_length": 275.6088562011719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0048,
|
|
"grad_norm": 5.7775468826293945,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": -0.0927,
|
|
"num_tokens": 1199768.0,
|
|
"reward": 0.46704915165901184,
|
|
"reward_std": 0.6950008869171143,
|
|
"rewards/accuracy_reward_step": 0.12890625,
|
|
"rewards/final_brier_reward_step": 0.15416249632835388,
|
|
"rewards/format_reward_step": 0.3359375,
|
|
"rewards/stepwise_brier_reward": 0.26872166991233826,
|
|
"step": 3
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.421875,
|
|
"calib/auroc": 0.5758771929824561,
|
|
"calib/avg_num_step_conf": 1.71484375,
|
|
"calib/ece": 0.716492537313433,
|
|
"calib/final_conf_rate": 0.5234375,
|
|
"calib/format_rate": 0.30078125,
|
|
"calib/frac_conf_gt_0.9": 0.48507462686567165,
|
|
"calib/gap": 0.05084210526315769,
|
|
"calib/mean_conf": 0.8657462686567163,
|
|
"calib/mu_c": 0.9089999999999998,
|
|
"calib/mu_w": 0.8581578947368421,
|
|
"calib/nonempty_final_conf_rate": 0.5234375,
|
|
"calib/nonempty_reasoning_rate": 0.51953125,
|
|
"calib/nonempty_step_conf_rate": 0.4140625,
|
|
"calib/pce": 0.716492537313433,
|
|
"calib/std_conf": 0.17957818263950365,
|
|
"calib/step_conf_rate": 0.4140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.04296875,
|
|
"completions/max_length": 1399.0,
|
|
"completions/max_terminated_length": 1399.0,
|
|
"completions/mean_length": 232.78515625,
|
|
"completions/mean_terminated_length": 243.23672485351562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 1.5228453874588013,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": -0.11,
|
|
"num_tokens": 1612633.0,
|
|
"reward": 0.3533709645271301,
|
|
"reward_std": 0.52557772397995,
|
|
"rewards/accuracy_reward_step": 0.078125,
|
|
"rewards/final_brier_reward_step": 0.10705117136240005,
|
|
"rewards/format_reward_step": 0.30078125,
|
|
"rewards/stepwise_brier_reward": 0.23612016439437866,
|
|
"step": 4
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.6171875,
|
|
"calib/auroc": 0.5364382239382239,
|
|
"calib/avg_num_step_conf": 2.953125,
|
|
"calib/ece": 0.7019886363636365,
|
|
"calib/final_conf_rate": 0.6875,
|
|
"calib/format_rate": 0.49609375,
|
|
"calib/frac_conf_gt_0.9": 0.5227272727272727,
|
|
"calib/gap": 0.045009652509652276,
|
|
"calib/mean_conf": 0.8610795454545456,
|
|
"calib/mu_c": 0.8989285714285712,
|
|
"calib/mu_w": 0.8539189189189189,
|
|
"calib/nonempty_final_conf_rate": 0.6875,
|
|
"calib/nonempty_reasoning_rate": 0.71875,
|
|
"calib/nonempty_step_conf_rate": 0.609375,
|
|
"calib/pce": 0.7019886363636365,
|
|
"calib/std_conf": 0.18420429133233548,
|
|
"calib/step_conf_rate": 0.609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03515625,
|
|
"completions/max_length": 1381.0,
|
|
"completions/max_terminated_length": 1381.0,
|
|
"completions/mean_length": 282.92578125,
|
|
"completions/mean_terminated_length": 293.2348327636719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.008,
|
|
"grad_norm": 0.5310338735580444,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": -0.0677,
|
|
"num_tokens": 2046894.0,
|
|
"reward": 0.543309211730957,
|
|
"reward_std": 0.632076621055603,
|
|
"rewards/accuracy_reward_step": 0.109375,
|
|
"rewards/final_brier_reward_step": 0.17852148413658142,
|
|
"rewards/format_reward_step": 0.49609375,
|
|
"rewards/stepwise_brier_reward": 0.3462778329849243,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.6484375,
|
|
"calib/auroc": 0.5823987333421295,
|
|
"calib/avg_num_step_conf": 3.1484375,
|
|
"calib/ece": 0.5968367346938777,
|
|
"calib/final_conf_rate": 0.765625,
|
|
"calib/format_rate": 0.55078125,
|
|
"calib/frac_conf_gt_0.9": 0.5408163265306123,
|
|
"calib/gap": 0.06417205436073359,
|
|
"calib/mean_conf": 0.8571428571428571,
|
|
"calib/mu_c": 0.9039622641509434,
|
|
"calib/mu_w": 0.8397902097902098,
|
|
"calib/nonempty_final_conf_rate": 0.765625,
|
|
"calib/nonempty_reasoning_rate": 0.7890625,
|
|
"calib/nonempty_step_conf_rate": 0.70703125,
|
|
"calib/pce": 0.5917857142857144,
|
|
"calib/std_conf": 0.19894620337351177,
|
|
"calib/step_conf_rate": 0.70703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03125,
|
|
"completions/max_length": 1501.0,
|
|
"completions/max_terminated_length": 1501.0,
|
|
"completions/mean_length": 250.95703125,
|
|
"completions/mean_terminated_length": 259.0523986816406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 3.514328718185425,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": -0.0934,
|
|
"num_tokens": 2465075.0,
|
|
"reward": 0.7629420161247253,
|
|
"reward_std": 0.801480770111084,
|
|
"rewards/accuracy_reward_step": 0.2109375,
|
|
"rewards/final_brier_reward_step": 0.25608164072036743,
|
|
"rewards/format_reward_step": 0.55078125,
|
|
"rewards/stepwise_brier_reward": 0.42849892377853394,
|
|
"step": 6
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.6015625,
|
|
"calib/auroc": 0.6462039297756075,
|
|
"calib/avg_num_step_conf": 3.23046875,
|
|
"calib/ece": 0.5529207920792079,
|
|
"calib/final_conf_rate": 0.7890625,
|
|
"calib/format_rate": 0.5,
|
|
"calib/frac_conf_gt_0.9": 0.5247524752475248,
|
|
"calib/gap": 0.10030810370887144,
|
|
"calib/mean_conf": 0.8549009900990099,
|
|
"calib/mu_c": 0.9249180327868857,
|
|
"calib/mu_w": 0.8246099290780142,
|
|
"calib/nonempty_final_conf_rate": 0.7890625,
|
|
"calib/nonempty_reasoning_rate": 0.75,
|
|
"calib/nonempty_step_conf_rate": 0.6640625,
|
|
"calib/pce": 0.5529207920792079,
|
|
"calib/std_conf": 0.18184216880431048,
|
|
"calib/step_conf_rate": 0.6640625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 1453.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 247.078125,
|
|
"completions/mean_terminated_length": 252.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0112,
|
|
"grad_norm": 0.46240970492362976,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": -0.0902,
|
|
"num_tokens": 2874079.0,
|
|
"reward": 0.775140106678009,
|
|
"reward_std": 0.7889308929443359,
|
|
"rewards/accuracy_reward_step": 0.23828125,
|
|
"rewards/final_brier_reward_step": 0.2891632914543152,
|
|
"rewards/format_reward_step": 0.5,
|
|
"rewards/stepwise_brier_reward": 0.38170963525772095,
|
|
"step": 7
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.73828125,
|
|
"calib/auroc": 0.5490196078431373,
|
|
"calib/avg_num_step_conf": 4.1328125,
|
|
"calib/ece": 0.5892924528301888,
|
|
"calib/final_conf_rate": 0.828125,
|
|
"calib/format_rate": 0.66796875,
|
|
"calib/frac_conf_gt_0.9": 0.5094339622641509,
|
|
"calib/gap": 0.05312174587349039,
|
|
"calib/mean_conf": 0.8675943396226415,
|
|
"calib/mu_c": 0.9059322033898304,
|
|
"calib/mu_w": 0.85281045751634,
|
|
"calib/nonempty_final_conf_rate": 0.828125,
|
|
"calib/nonempty_reasoning_rate": 0.85546875,
|
|
"calib/nonempty_step_conf_rate": 0.7890625,
|
|
"calib/pce": 0.5892924528301888,
|
|
"calib/std_conf": 0.15787912012591696,
|
|
"calib/step_conf_rate": 0.7890625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1425.0,
|
|
"completions/max_terminated_length": 1425.0,
|
|
"completions/mean_length": 293.1953125,
|
|
"completions/mean_terminated_length": 296.67193603515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 2.790337324142456,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": -0.039,
|
|
"num_tokens": 3280009.0,
|
|
"reward": 0.8816836476325989,
|
|
"reward_std": 0.7367245554924011,
|
|
"rewards/accuracy_reward_step": 0.23046875,
|
|
"rewards/final_brier_reward_step": 0.329680860042572,
|
|
"rewards/format_reward_step": 0.66796875,
|
|
"rewards/stepwise_brier_reward": 0.4783037602901459,
|
|
"step": 8
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.77734375,
|
|
"calib/auroc": 0.575652841781874,
|
|
"calib/avg_num_step_conf": 4.96875,
|
|
"calib/ece": 0.658868778280543,
|
|
"calib/final_conf_rate": 0.86328125,
|
|
"calib/format_rate": 0.75,
|
|
"calib/frac_conf_gt_0.9": 0.39819004524886875,
|
|
"calib/gap": 0.07864362519201207,
|
|
"calib/mean_conf": 0.8172398190045249,
|
|
"calib/mu_c": 0.8834285714285712,
|
|
"calib/mu_w": 0.8047849462365592,
|
|
"calib/nonempty_final_conf_rate": 0.86328125,
|
|
"calib/nonempty_reasoning_rate": 0.8984375,
|
|
"calib/nonempty_step_conf_rate": 0.875,
|
|
"calib/pce": 0.658868778280543,
|
|
"calib/std_conf": 0.21989730521265205,
|
|
"calib/step_conf_rate": 0.875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02734375,
|
|
"completions/max_length": 1453.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 288.578125,
|
|
"completions/mean_terminated_length": 296.6907653808594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0144,
|
|
"grad_norm": 0.49306055903434753,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": -0.0522,
|
|
"num_tokens": 3713349.0,
|
|
"reward": 0.7794851660728455,
|
|
"reward_std": 0.5652980804443359,
|
|
"rewards/accuracy_reward_step": 0.13671875,
|
|
"rewards/final_brier_reward_step": 0.31141015887260437,
|
|
"rewards/format_reward_step": 0.75,
|
|
"rewards/stepwise_brier_reward": 0.4862178862094879,
|
|
"step": 9
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.92578125,
|
|
"calib/auroc": 0.6339285714285713,
|
|
"calib/avg_num_step_conf": 5.59765625,
|
|
"calib/ece": 0.6440000000000003,
|
|
"calib/final_conf_rate": 0.9375,
|
|
"calib/format_rate": 0.89453125,
|
|
"calib/frac_conf_gt_0.9": 0.4875,
|
|
"calib/gap": 0.05798136645962737,
|
|
"calib/mean_conf": 0.8773333333333333,
|
|
"calib/mu_c": 0.9217857142857143,
|
|
"calib/mu_w": 0.863804347826087,
|
|
"calib/nonempty_final_conf_rate": 0.9375,
|
|
"calib/nonempty_reasoning_rate": 0.98828125,
|
|
"calib/nonempty_step_conf_rate": 0.97265625,
|
|
"calib/pce": 0.6440000000000003,
|
|
"calib/std_conf": 0.13314611856486425,
|
|
"calib/step_conf_rate": 0.97265625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1481.0,
|
|
"completions/max_terminated_length": 1481.0,
|
|
"completions/mean_length": 276.734375,
|
|
"completions/mean_terminated_length": 280.01580810546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 10.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.18479806184768677,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0062,
|
|
"num_tokens": 4130337.0,
|
|
"reward": 1.016362190246582,
|
|
"reward_std": 0.5757254362106323,
|
|
"rewards/accuracy_reward_step": 0.21875,
|
|
"rewards/final_brier_reward_step": 0.36681604385375977,
|
|
"rewards/format_reward_step": 0.89453125,
|
|
"rewards/stepwise_brier_reward": 0.5970702767372131,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.96484375,
|
|
"calib/auroc": 0.5444670050761422,
|
|
"calib/avg_num_step_conf": 5.171875,
|
|
"calib/ece": 0.6605263157894737,
|
|
"calib/final_conf_rate": 0.96484375,
|
|
"calib/format_rate": 0.9453125,
|
|
"calib/frac_conf_gt_0.9": 0.46153846153846156,
|
|
"calib/gap": 0.021962436548223274,
|
|
"calib/mean_conf": 0.8602834008097167,
|
|
"calib/mu_c": 0.8778,
|
|
"calib/mu_w": 0.8558375634517768,
|
|
"calib/nonempty_final_conf_rate": 0.96484375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.6591902834008098,
|
|
"calib/std_conf": 0.15018046043386205,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 946.0,
|
|
"completions/max_terminated_length": 946.0,
|
|
"completions/mean_length": 253.4609375,
|
|
"completions/mean_terminated_length": 256.4664001464844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.0176,
|
|
"grad_norm": 0.3017382323741913,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": -0.0158,
|
|
"num_tokens": 4553111.0,
|
|
"reward": 1.0321794748306274,
|
|
"reward_std": 0.512408435344696,
|
|
"rewards/accuracy_reward_step": 0.19921875,
|
|
"rewards/final_brier_reward_step": 0.3711339831352234,
|
|
"rewards/format_reward_step": 0.9453125,
|
|
"rewards/stepwise_brier_reward": 0.6716465950012207,
|
|
"step": 11
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6409646739130436,
|
|
"calib/avg_num_step_conf": 5.16015625,
|
|
"calib/ece": 0.5009126984126984,
|
|
"calib/final_conf_rate": 0.984375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.42063492063492064,
|
|
"calib/gap": 0.06845923913043461,
|
|
"calib/mean_conf": 0.8580555555555555,
|
|
"calib/mu_c": 0.9015217391304348,
|
|
"calib/mu_w": 0.8330625000000002,
|
|
"calib/nonempty_final_conf_rate": 0.984375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.49694444444444436,
|
|
"calib/std_conf": 0.15787964099453852,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1502.0,
|
|
"completions/max_terminated_length": 1502.0,
|
|
"completions/mean_length": 262.90234375,
|
|
"completions/mean_terminated_length": 266.019775390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.09093901515007019,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0092,
|
|
"num_tokens": 4969230.0,
|
|
"reward": 1.3576459884643555,
|
|
"reward_std": 0.6042752265930176,
|
|
"rewards/accuracy_reward_step": 0.359375,
|
|
"rewards/final_brier_reward_step": 0.5232961177825928,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.7901004552841187,
|
|
"step": 12
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.98828125,
|
|
"calib/auroc": 0.6115916955017302,
|
|
"calib/avg_num_step_conf": 5.234375,
|
|
"calib/ece": 0.5064313725490198,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98046875,
|
|
"calib/frac_conf_gt_0.9": 0.3607843137254902,
|
|
"calib/gap": 0.06776470588235295,
|
|
"calib/mean_conf": 0.8397647058823529,
|
|
"calib/mu_c": 0.8849411764705883,
|
|
"calib/mu_w": 0.8171764705882354,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.5064313725490198,
|
|
"calib/std_conf": 0.16581097059607222,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 1437.0,
|
|
"completions/max_terminated_length": 1437.0,
|
|
"completions/mean_length": 254.0703125,
|
|
"completions/mean_terminated_length": 258.1031799316406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.0208,
|
|
"grad_norm": 0.2129509001970291,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 5393472.0,
|
|
"reward": 1.2935614585876465,
|
|
"reward_std": 0.4629979729652405,
|
|
"rewards/accuracy_reward_step": 0.33203125,
|
|
"rewards/final_brier_reward_step": 0.514092206954956,
|
|
"rewards/format_reward_step": 0.98046875,
|
|
"rewards/stepwise_brier_reward": 0.707028865814209,
|
|
"step": 13
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.631770498822522,
|
|
"calib/avg_num_step_conf": 5.16796875,
|
|
"calib/ece": 0.500472440944882,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.3031496062992126,
|
|
"calib/gap": 0.08883608078213079,
|
|
"calib/mean_conf": 0.8193700787401575,
|
|
"calib/mu_c": 0.8798765432098766,
|
|
"calib/mu_w": 0.7910404624277458,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.500472440944882,
|
|
"calib/std_conf": 0.17302427134595005,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 661.0,
|
|
"completions/max_terminated_length": 661.0,
|
|
"completions/mean_length": 243.65234375,
|
|
"completions/mean_terminated_length": 247.5198516845703,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.07910128682851791,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0066,
|
|
"num_tokens": 5813583.0,
|
|
"reward": 1.2911372184753418,
|
|
"reward_std": 0.4125267565250397,
|
|
"rewards/accuracy_reward_step": 0.31640625,
|
|
"rewards/final_brier_reward_step": 0.5366746187210083,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.752874493598938,
|
|
"step": 14
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.5837162350320245,
|
|
"calib/avg_num_step_conf": 5.390625,
|
|
"calib/ece": 0.44505882352941173,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.19607843137254902,
|
|
"calib/gap": 0.06510233918128638,
|
|
"calib/mean_conf": 0.7738431372549021,
|
|
"calib/mu_c": 0.8175,
|
|
"calib/mu_w": 0.7523976608187136,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.4447450980392157,
|
|
"calib/std_conf": 0.18299505752985162,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 731.0,
|
|
"completions/max_terminated_length": 731.0,
|
|
"completions/mean_length": 264.93359375,
|
|
"completions/mean_terminated_length": 269.138916015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.024,
|
|
"grad_norm": 0.09459760040044785,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": -0.0053,
|
|
"num_tokens": 6235486.0,
|
|
"reward": 1.3260624408721924,
|
|
"reward_std": 0.4246816039085388,
|
|
"rewards/accuracy_reward_step": 0.328125,
|
|
"rewards/final_brier_reward_step": 0.5746027231216431,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.7687090635299683,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.64665304036791,
|
|
"calib/avg_num_step_conf": 5.72265625,
|
|
"calib/ece": 0.2550980392156863,
|
|
"calib/final_conf_rate": 0.99609375,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.08627450980392157,
|
|
"calib/gap": 0.1255825242718448,
|
|
"calib/mean_conf": 0.6557254901960784,
|
|
"calib/mu_c": 0.7305825242718448,
|
|
"calib/mu_w": 0.605,
|
|
"calib/nonempty_final_conf_rate": 0.99609375,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.25345098039215685,
|
|
"calib/std_conf": 0.23075188937100627,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 755.0,
|
|
"completions/max_terminated_length": 755.0,
|
|
"completions/mean_length": 277.08984375,
|
|
"completions/mean_terminated_length": 281.48809814453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 64.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.09741365909576416,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": -0.0131,
|
|
"num_tokens": 6675669.0,
|
|
"reward": 1.4592740535736084,
|
|
"reward_std": 0.45420658588409424,
|
|
"rewards/accuracy_reward_step": 0.40234375,
|
|
"rewards/final_brier_reward_step": 0.6949933767318726,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.7514780759811401,
|
|
"step": 16
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.9921875,
|
|
"calib/auroc": 0.6593394886363637,
|
|
"calib/avg_num_step_conf": 5.6796875,
|
|
"calib/ece": 0.3116796875,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.05078125,
|
|
"calib/gap": 0.13501136363636368,
|
|
"calib/mean_conf": 0.6241796875000001,
|
|
"calib/mu_c": 0.717,
|
|
"calib/mu_w": 0.5819886363636363,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.3116796875,
|
|
"calib/std_conf": 0.2192763999782064,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 695.0,
|
|
"completions/max_terminated_length": 695.0,
|
|
"completions/mean_length": 264.9296875,
|
|
"completions/mean_terminated_length": 269.13494873046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 111.0,
|
|
"epoch": 0.0272,
|
|
"grad_norm": 0.26401224732398987,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.0128,
|
|
"num_tokens": 7067083.0,
|
|
"reward": 1.3285796642303467,
|
|
"reward_std": 0.42579442262649536,
|
|
"rewards/accuracy_reward_step": 0.3125,
|
|
"rewards/final_brier_reward_step": 0.6931965351104736,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.7617468237876892,
|
|
"step": 17
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6271983225108225,
|
|
"calib/avg_num_step_conf": 5.765625,
|
|
"calib/ece": 0.21310546874999994,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0923782467532468,
|
|
"calib/mean_conf": 0.54943359375,
|
|
"calib/mu_c": 0.6100568181818182,
|
|
"calib/mu_w": 0.5176785714285714,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.20939453124999993,
|
|
"calib/std_conf": 0.19860984514925226,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 787.0,
|
|
"completions/max_terminated_length": 787.0,
|
|
"completions/mean_length": 280.92578125,
|
|
"completions/mean_terminated_length": 285.38494873046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.07939291000366211,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": -0.0106,
|
|
"num_tokens": 7489616.0,
|
|
"reward": 1.3866751194000244,
|
|
"reward_std": 0.384634792804718,
|
|
"rewards/accuracy_reward_step": 0.34375,
|
|
"rewards/final_brier_reward_step": 0.731840968132019,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.7601720690727234,
|
|
"step": 18
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.6573951434878588,
|
|
"calib/avg_num_step_conf": 5.40625,
|
|
"calib/ece": 0.1323828125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.9921875,
|
|
"calib/frac_conf_gt_0.9": 0.01171875,
|
|
"calib/gap": 0.136558183538316,
|
|
"calib/mean_conf": 0.5040234375,
|
|
"calib/mu_c": 0.5845714285714286,
|
|
"calib/mu_w": 0.44801324503311263,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.113125,
|
|
"calib/std_conf": 0.21852091450404373,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 599.0,
|
|
"completions/max_terminated_length": 599.0,
|
|
"completions/mean_length": 261.3671875,
|
|
"completions/mean_terminated_length": 265.5158996582031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.0304,
|
|
"grad_norm": 0.0737985372543335,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.0071,
|
|
"num_tokens": 7910702.0,
|
|
"reward": 1.4832749366760254,
|
|
"reward_std": 0.37495696544647217,
|
|
"rewards/accuracy_reward_step": 0.41015625,
|
|
"rewards/final_brier_reward_step": 0.7626621127128601,
|
|
"rewards/format_reward_step": 0.9921875,
|
|
"rewards/stepwise_brier_reward": 0.7251256108283997,
|
|
"step": 19
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6827485380116959,
|
|
"calib/avg_num_step_conf": 5.48828125,
|
|
"calib/ece": 0.142578125,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.99609375,
|
|
"calib/frac_conf_gt_0.9": 0.00390625,
|
|
"calib/gap": 0.12841520467836265,
|
|
"calib/mean_conf": 0.40523437500000004,
|
|
"calib/mu_c": 0.49552631578947376,
|
|
"calib/mu_w": 0.3671111111111111,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.99609375,
|
|
"calib/pce": 0.12546875,
|
|
"calib/std_conf": 0.21182416367912177,
|
|
"calib/step_conf_rate": 0.99609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 721.0,
|
|
"completions/max_terminated_length": 721.0,
|
|
"completions/mean_length": 260.56640625,
|
|
"completions/mean_terminated_length": 264.702392578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.12154613435268402,
|
|
"learning_rate": 5e-06,
|
|
"loss": -0.0238,
|
|
"num_tokens": 8331383.0,
|
|
"reward": 1.3289387226104736,
|
|
"reward_std": 0.37291592359542847,
|
|
"rewards/accuracy_reward_step": 0.296875,
|
|
"rewards/final_brier_reward_step": 0.7844409942626953,
|
|
"rewards/format_reward_step": 0.99609375,
|
|
"rewards/stepwise_brier_reward": 0.7578766345977783,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.7274920772111784,
|
|
"calib/avg_num_step_conf": 5.640625,
|
|
"calib/ece": 0.05320312500000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 1.0,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.14939786804955343,
|
|
"calib/mean_conf": 0.28625,
|
|
"calib/mu_c": 0.3901282051282051,
|
|
"calib/mu_w": 0.24073033707865168,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 1.0,
|
|
"calib/pce": 0.0173828125,
|
|
"calib/std_conf": 0.19753362055609675,
|
|
"calib/step_conf_rate": 1.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 670.0,
|
|
"completions/max_terminated_length": 670.0,
|
|
"completions/mean_length": 282.7109375,
|
|
"completions/mean_terminated_length": 287.19842529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 111.0,
|
|
"epoch": 0.0336,
|
|
"grad_norm": 1.1783859729766846,
|
|
"learning_rate": 4.9722222222222224e-06,
|
|
"loss": 0.0039,
|
|
"num_tokens": 8757205.0,
|
|
"reward": 1.3317503929138184,
|
|
"reward_std": 0.22663286328315735,
|
|
"rewards/accuracy_reward_step": 0.3046875,
|
|
"rewards/final_brier_reward_step": 0.8120882511138916,
|
|
"rewards/format_reward_step": 1.0,
|
|
"rewards/stepwise_brier_reward": 0.6867885589599609,
|
|
"step": 21
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.92578125,
|
|
"completions/max_length": 1410.0,
|
|
"completions/max_terminated_length": 1410.0,
|
|
"completions/mean_length": 37.64453125,
|
|
"completions/mean_terminated_length": 507.2105407714844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 9116962.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 22
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.99609375,
|
|
"calib/auroc": 0.7493589743589744,
|
|
"calib/avg_num_step_conf": 5.2265625,
|
|
"calib/ece": 0.24708661417322836,
|
|
"calib/final_conf_rate": 0.9921875,
|
|
"calib/format_rate": 0.984375,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.10894487179487176,
|
|
"calib/mean_conf": 0.16787401574803149,
|
|
"calib/mu_c": 0.23221153846153844,
|
|
"calib/mu_w": 0.12326666666666668,
|
|
"calib/nonempty_final_conf_rate": 0.9921875,
|
|
"calib/nonempty_reasoning_rate": 0.99609375,
|
|
"calib/nonempty_step_conf_rate": 0.9921875,
|
|
"calib/pce": 0.0027559055118110236,
|
|
"calib/std_conf": 0.1342205842273474,
|
|
"calib/step_conf_rate": 0.9921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 504.0,
|
|
"completions/max_terminated_length": 504.0,
|
|
"completions/mean_length": 252.14453125,
|
|
"completions/mean_terminated_length": 256.1468505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.0368,
|
|
"grad_norm": 3.252777576446533,
|
|
"learning_rate": 4.9166666666666665e-06,
|
|
"loss": -0.0153,
|
|
"num_tokens": 9537703.0,
|
|
"reward": 1.454376220703125,
|
|
"reward_std": 0.3514174520969391,
|
|
"rewards/accuracy_reward_step": 0.40625,
|
|
"rewards/final_brier_reward_step": 0.7256972789764404,
|
|
"rewards/format_reward_step": 0.984375,
|
|
"rewards/stepwise_brier_reward": 0.6855578422546387,
|
|
"step": 23
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 1.0,
|
|
"calib/auroc": 0.6726219217769194,
|
|
"calib/avg_num_step_conf": 5.2890625,
|
|
"calib/ece": 0.03889687500000001,
|
|
"calib/final_conf_rate": 1.0,
|
|
"calib/format_rate": 0.98828125,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.0620073394495413,
|
|
"calib/mean_conf": 0.12219687500000002,
|
|
"calib/mu_c": 0.17500000000000002,
|
|
"calib/mu_w": 0.11299266055045872,
|
|
"calib/nonempty_final_conf_rate": 1.0,
|
|
"calib/nonempty_reasoning_rate": 1.0,
|
|
"calib/nonempty_step_conf_rate": 0.98828125,
|
|
"calib/pce": 0.006328125,
|
|
"calib/std_conf": 0.11195482901703872,
|
|
"calib/step_conf_rate": 0.98828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 657.0,
|
|
"completions/max_terminated_length": 657.0,
|
|
"completions/mean_length": 266.84765625,
|
|
"completions/mean_terminated_length": 271.0833435058594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 7.944537162780762,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": -0.0158,
|
|
"num_tokens": 9951088.0,
|
|
"reward": 1.089248776435852,
|
|
"reward_std": 0.29503536224365234,
|
|
"rewards/accuracy_reward_step": 0.1484375,
|
|
"rewards/final_brier_reward_step": 0.8644500970840454,
|
|
"rewards/format_reward_step": 0.98828125,
|
|
"rewards/stepwise_brier_reward": 0.625357449054718,
|
|
"step": 24
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.92578125,
|
|
"calib/auroc": 0.6397745571658615,
|
|
"calib/avg_num_step_conf": 5.22265625,
|
|
"calib/ece": 0.05970464135021098,
|
|
"calib/final_conf_rate": 0.92578125,
|
|
"calib/format_rate": 0.890625,
|
|
"calib/frac_conf_gt_0.9": 0.0,
|
|
"calib/gap": 0.04932850241545894,
|
|
"calib/mean_conf": 0.09658227848101265,
|
|
"calib/mu_c": 0.1396666666666667,
|
|
"calib/mu_w": 0.09033816425120775,
|
|
"calib/nonempty_final_conf_rate": 0.92578125,
|
|
"calib/nonempty_reasoning_rate": 0.9453125,
|
|
"calib/nonempty_step_conf_rate": 0.91796875,
|
|
"calib/pce": 0.01485232067510549,
|
|
"calib/std_conf": 0.11345981361786617,
|
|
"calib/step_conf_rate": 0.91796875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05078125,
|
|
"completions/max_length": 1428.0,
|
|
"completions/max_terminated_length": 1428.0,
|
|
"completions/mean_length": 320.12890625,
|
|
"completions/mean_terminated_length": 337.255126953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.04,
|
|
"grad_norm": 20.122507095336914,
|
|
"learning_rate": 4.861111111111111e-06,
|
|
"loss": -0.0936,
|
|
"num_tokens": 10391969.0,
|
|
"reward": 0.9744135141372681,
|
|
"reward_std": 0.32630473375320435,
|
|
"rewards/accuracy_reward_step": 0.1171875,
|
|
"rewards/final_brier_reward_step": 0.7882003784179688,
|
|
"rewards/format_reward_step": 0.890625,
|
|
"rewards/stepwise_brier_reward": 0.6250784397125244,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1196.0,
|
|
"completions/max_terminated_length": 1196.0,
|
|
"completions/mean_length": 32.34375,
|
|
"completions/mean_terminated_length": 517.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.833333333333333e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 10764321.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 26
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.91796875,
|
|
"completions/max_length": 1371.0,
|
|
"completions/max_terminated_length": 1371.0,
|
|
"completions/mean_length": 34.5546875,
|
|
"completions/mean_terminated_length": 421.23809814453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0432,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.805555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 11116359.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 27
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90625,
|
|
"completions/max_length": 1331.0,
|
|
"completions/max_terminated_length": 1331.0,
|
|
"completions/mean_length": 33.015625,
|
|
"completions/mean_terminated_length": 352.16668701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.777777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 11469995.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 28
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.89453125,
|
|
"completions/max_length": 1224.0,
|
|
"completions/max_terminated_length": 1224.0,
|
|
"completions/mean_length": 35.203125,
|
|
"completions/mean_terminated_length": 333.77777099609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 11.0,
|
|
"epoch": 0.0464,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 11838655.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 29
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.640625,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 141.12109375,
|
|
"completions/mean_terminated_length": 392.6847839355469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.722222222222222e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 12215350.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1526.0,
|
|
"completions/max_terminated_length": 1526.0,
|
|
"completions/mean_length": 210.26953125,
|
|
"completions/mean_terminated_length": 480.6160888671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0496,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.694444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 12614987.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 31
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.640625,
|
|
"completions/max_length": 1417.0,
|
|
"completions/max_terminated_length": 1417.0,
|
|
"completions/mean_length": 178.796875,
|
|
"completions/mean_terminated_length": 497.5217590332031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.666666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 12997791.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 32
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.95703125,
|
|
"completions/max_length": 1308.0,
|
|
"completions/max_terminated_length": 1308.0,
|
|
"completions/mean_length": 22.359375,
|
|
"completions/mean_terminated_length": 520.3636474609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.0528,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.638888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 13355163.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 33
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9765625,
|
|
"completions/max_length": 579.0,
|
|
"completions/max_terminated_length": 579.0,
|
|
"completions/mean_length": 4.125,
|
|
"completions/mean_terminated_length": 176.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.611111111111112e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 13708307.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 34
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.98828125,
|
|
"completions/max_length": 1375.0,
|
|
"completions/max_terminated_length": 1375.0,
|
|
"completions/mean_length": 6.578125,
|
|
"completions/mean_terminated_length": 561.3333740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 97.0,
|
|
"epoch": 0.056,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.583333333333333e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 14065343.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9921875,
|
|
"completions/max_length": 899.0,
|
|
"completions/max_terminated_length": 899.0,
|
|
"completions/mean_length": 3.6640625,
|
|
"completions/mean_terminated_length": 469.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.555555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 14410681.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 36
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9765625,
|
|
"completions/max_length": 978.0,
|
|
"completions/max_terminated_length": 978.0,
|
|
"completions/mean_length": 6.95703125,
|
|
"completions/mean_terminated_length": 296.8333435058594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 17.0,
|
|
"epoch": 0.0592,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.527777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 14765214.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 37
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.99609375,
|
|
"completions/max_length": 277.0,
|
|
"completions/max_terminated_length": 277.0,
|
|
"completions/mean_length": 1.08203125,
|
|
"completions/mean_terminated_length": 277.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 277.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 15121371.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 38
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.98046875,
|
|
"completions/max_length": 1356.0,
|
|
"completions/max_terminated_length": 1356.0,
|
|
"completions/mean_length": 8.4140625,
|
|
"completions/mean_terminated_length": 430.8000183105469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.0624,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.472222222222223e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 15470269.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 39
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.98828125,
|
|
"completions/max_length": 1535.0,
|
|
"completions/max_terminated_length": 1535.0,
|
|
"completions/mean_length": 7.1484375,
|
|
"completions/mean_terminated_length": 610.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 15814235.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.984375,
|
|
"completions/max_length": 1366.0,
|
|
"completions/max_terminated_length": 1366.0,
|
|
"completions/mean_length": 14.02734375,
|
|
"completions/mean_terminated_length": 897.75,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 16.0,
|
|
"epoch": 0.0656,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.416666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 16177746.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 41
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.97265625,
|
|
"completions/max_length": 1460.0,
|
|
"completions/max_terminated_length": 1460.0,
|
|
"completions/mean_length": 18.02734375,
|
|
"completions/mean_terminated_length": 659.2857666015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.388888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 16525649.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 42
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.99609375,
|
|
"completions/max_length": 32.0,
|
|
"completions/max_terminated_length": 32.0,
|
|
"completions/mean_length": 0.125,
|
|
"completions/mean_terminated_length": 32.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.0688,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.361111111111112e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 16885265.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 43
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.98046875,
|
|
"completions/max_length": 1427.0,
|
|
"completions/max_terminated_length": 1427.0,
|
|
"completions/mean_length": 11.9296875,
|
|
"completions/mean_terminated_length": 610.7999877929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 76.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 17250447.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 44
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.96875,
|
|
"completions/max_length": 1028.0,
|
|
"completions/max_terminated_length": 1028.0,
|
|
"completions/mean_length": 12.37109375,
|
|
"completions/mean_terminated_length": 395.875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.072,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.305555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 17601958.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9609375,
|
|
"completions/max_length": 961.0,
|
|
"completions/max_terminated_length": 961.0,
|
|
"completions/mean_length": 10.73828125,
|
|
"completions/mean_terminated_length": 274.8999938964844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.277777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 17933307.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 46
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.96484375,
|
|
"completions/max_length": 1402.0,
|
|
"completions/max_terminated_length": 1402.0,
|
|
"completions/mean_length": 22.984375,
|
|
"completions/mean_terminated_length": 653.7777709960938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.0,
|
|
"epoch": 0.0752,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.25e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 18275847.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 47
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.94140625,
|
|
"completions/max_length": 1325.0,
|
|
"completions/max_terminated_length": 1325.0,
|
|
"completions/mean_length": 19.94140625,
|
|
"completions/mean_terminated_length": 340.3333435058594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.222222222222223e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 18617752.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 48
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.91796875,
|
|
"completions/max_length": 1288.0,
|
|
"completions/max_terminated_length": 1288.0,
|
|
"completions/mean_length": 29.03515625,
|
|
"completions/mean_terminated_length": 353.952392578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.0784,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.194444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 18986769.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 49
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.921875,
|
|
"completions/max_length": 1294.0,
|
|
"completions/max_terminated_length": 1294.0,
|
|
"completions/mean_length": 27.54296875,
|
|
"completions/mean_terminated_length": 352.5500183105469,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 20.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 19344444.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9453125,
|
|
"completions/max_length": 1376.0,
|
|
"completions/max_terminated_length": 1376.0,
|
|
"completions/mean_length": 20.5,
|
|
"completions/mean_terminated_length": 374.8571472167969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.0816,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.138888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 19700036.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 51
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.94140625,
|
|
"completions/max_length": 1450.0,
|
|
"completions/max_terminated_length": 1450.0,
|
|
"completions/mean_length": 43.10546875,
|
|
"completions/mean_terminated_length": 735.6666870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.111111111111111e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 20068951.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 52
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.94921875,
|
|
"completions/max_length": 1377.0,
|
|
"completions/max_terminated_length": 1377.0,
|
|
"completions/mean_length": 25.51171875,
|
|
"completions/mean_terminated_length": 502.3846435546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.0848,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.083333333333334e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 20424602.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 53
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9453125,
|
|
"completions/max_length": 1281.0,
|
|
"completions/max_terminated_length": 1281.0,
|
|
"completions/mean_length": 19.23828125,
|
|
"completions/mean_terminated_length": 351.7857360839844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.055555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 20775135.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 54
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1294.0,
|
|
"completions/max_terminated_length": 1294.0,
|
|
"completions/mean_length": 33.828125,
|
|
"completions/mean_terminated_length": 541.25,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.088,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.027777777777779e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 21146899.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9453125,
|
|
"completions/max_length": 1430.0,
|
|
"completions/max_terminated_length": 1430.0,
|
|
"completions/mean_length": 35.44140625,
|
|
"completions/mean_terminated_length": 648.0714721679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 21513652.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 56
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.92578125,
|
|
"completions/max_length": 1511.0,
|
|
"completions/max_terminated_length": 1511.0,
|
|
"completions/mean_length": 42.671875,
|
|
"completions/mean_terminated_length": 574.9473876953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.0912,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.972222222222223e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 21864400.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 57
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.91015625,
|
|
"completions/max_length": 1168.0,
|
|
"completions/max_terminated_length": 1168.0,
|
|
"completions/mean_length": 34.66015625,
|
|
"completions/mean_terminated_length": 385.7826232910156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 22225137.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 58
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.92578125,
|
|
"completions/max_length": 1229.0,
|
|
"completions/max_terminated_length": 1229.0,
|
|
"completions/mean_length": 42.15625,
|
|
"completions/mean_terminated_length": 568.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.0944,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.916666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 22591353.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 59
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1398.0,
|
|
"completions/max_terminated_length": 1398.0,
|
|
"completions/mean_length": 59.44921875,
|
|
"completions/mean_terminated_length": 524.7930908203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.88888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 22951116.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.95703125,
|
|
"completions/max_length": 1140.0,
|
|
"completions/max_terminated_length": 1140.0,
|
|
"completions/mean_length": 18.49609375,
|
|
"completions/mean_terminated_length": 430.4545593261719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.0976,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.861111111111112e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 23313251.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 61
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90625,
|
|
"completions/max_length": 1399.0,
|
|
"completions/max_terminated_length": 1399.0,
|
|
"completions/mean_length": 60.0625,
|
|
"completions/mean_terminated_length": 640.6666870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.833333333333334e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 23674035.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 62
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.93359375,
|
|
"completions/max_length": 1509.0,
|
|
"completions/max_terminated_length": 1509.0,
|
|
"completions/mean_length": 33.50390625,
|
|
"completions/mean_terminated_length": 504.5294189453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 13.0,
|
|
"epoch": 0.1008,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.8055555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 24035628.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 63
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.91796875,
|
|
"completions/max_length": 1513.0,
|
|
"completions/max_terminated_length": 1513.0,
|
|
"completions/mean_length": 53.875,
|
|
"completions/mean_terminated_length": 656.7619018554688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 24413276.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 64
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1504.0,
|
|
"completions/max_terminated_length": 1504.0,
|
|
"completions/mean_length": 71.078125,
|
|
"completions/mean_terminated_length": 551.3939819335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.0,
|
|
"epoch": 0.104,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 24786392.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90234375,
|
|
"completions/max_length": 1521.0,
|
|
"completions/max_terminated_length": 1521.0,
|
|
"completions/mean_length": 60.75,
|
|
"completions/mean_terminated_length": 622.0799560546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 125.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.7222222222222225e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 25162168.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 66
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1494.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 64.5546875,
|
|
"completions/mean_terminated_length": 569.862060546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.1072,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.694444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 25540302.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 67
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1508.0,
|
|
"completions/max_terminated_length": 1508.0,
|
|
"completions/mean_length": 72.8046875,
|
|
"completions/mean_terminated_length": 564.7879028320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.6666666666666666e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 25910252.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 68
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8828125,
|
|
"completions/max_length": 1461.0,
|
|
"completions/max_terminated_length": 1461.0,
|
|
"completions/mean_length": 63.8671875,
|
|
"completions/mean_terminated_length": 545.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.1104,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.638888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 26256754.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 69
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1516.0,
|
|
"completions/max_terminated_length": 1516.0,
|
|
"completions/mean_length": 77.11328125,
|
|
"completions/mean_terminated_length": 616.90625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 93.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 26630207.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90625,
|
|
"completions/max_length": 1298.0,
|
|
"completions/max_terminated_length": 1298.0,
|
|
"completions/mean_length": 57.27734375,
|
|
"completions/mean_terminated_length": 610.9583740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.1136,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.5833333333333335e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 26989702.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 71
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9140625,
|
|
"completions/max_length": 1420.0,
|
|
"completions/max_terminated_length": 1420.0,
|
|
"completions/mean_length": 44.58984375,
|
|
"completions/mean_terminated_length": 518.8636474609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 17.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.555555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 27341005.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 72
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1266.0,
|
|
"completions/max_terminated_length": 1266.0,
|
|
"completions/mean_length": 63.0859375,
|
|
"completions/mean_terminated_length": 475.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 26.0,
|
|
"epoch": 0.1168,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.5277777777777784e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 27711395.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 73
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8984375,
|
|
"completions/max_length": 1445.0,
|
|
"completions/max_terminated_length": 1445.0,
|
|
"completions/mean_length": 62.2265625,
|
|
"completions/mean_terminated_length": 612.6923217773438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 20.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 28063365.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 74
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.890625,
|
|
"completions/max_length": 1501.0,
|
|
"completions/max_terminated_length": 1501.0,
|
|
"completions/mean_length": 68.1171875,
|
|
"completions/mean_terminated_length": 622.7857666015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.0,
|
|
"epoch": 0.12,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 28441107.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.890625,
|
|
"completions/max_length": 1430.0,
|
|
"completions/max_terminated_length": 1430.0,
|
|
"completions/mean_length": 54.921875,
|
|
"completions/mean_terminated_length": 502.14288330078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 11.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.444444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 28801687.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 76
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90234375,
|
|
"completions/max_length": 1505.0,
|
|
"completions/max_terminated_length": 1505.0,
|
|
"completions/mean_length": 47.8203125,
|
|
"completions/mean_terminated_length": 489.67999267578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 18.0,
|
|
"epoch": 0.1232,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.416666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 29165233.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 77
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8984375,
|
|
"completions/max_length": 1442.0,
|
|
"completions/max_terminated_length": 1442.0,
|
|
"completions/mean_length": 61.1796875,
|
|
"completions/mean_terminated_length": 602.3846435546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.3888888888888893e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 29528071.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 78
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1477.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 97.87109375,
|
|
"completions/mean_terminated_length": 677.1621704101562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.1264,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.3611111111111117e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 29903614.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 79
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1469.0,
|
|
"completions/max_terminated_length": 1469.0,
|
|
"completions/mean_length": 75.36328125,
|
|
"completions/mean_terminated_length": 551.2285766601562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 30275523.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1515.0,
|
|
"completions/max_terminated_length": 1515.0,
|
|
"completions/mean_length": 64.75390625,
|
|
"completions/mean_terminated_length": 502.3333435058594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1296,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.3055555555555558e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 30642444.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 81
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1463.0,
|
|
"completions/max_terminated_length": 1463.0,
|
|
"completions/mean_length": 98.23828125,
|
|
"completions/mean_terminated_length": 613.3901977539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.277777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31021409.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 82
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1511.0,
|
|
"completions/max_terminated_length": 1511.0,
|
|
"completions/mean_length": 94.79296875,
|
|
"completions/mean_terminated_length": 505.5625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.1328,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.2500000000000002e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31394028.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 83
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1408.0,
|
|
"completions/max_terminated_length": 1408.0,
|
|
"completions/mean_length": 64.3984375,
|
|
"completions/mean_terminated_length": 499.5757751464844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31771066.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 84
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1412.0,
|
|
"completions/max_terminated_length": 1412.0,
|
|
"completions/mean_length": 75.44921875,
|
|
"completions/mean_terminated_length": 568.0882568359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.136,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.1944444444444443e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 32135573.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1529.0,
|
|
"completions/max_terminated_length": 1529.0,
|
|
"completions/mean_length": 73.3359375,
|
|
"completions/mean_terminated_length": 457.9024353027344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 22.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.1666666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 32495427.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 86
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1470.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 86.609375,
|
|
"completions/mean_terminated_length": 583.4736938476562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.1392,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.138888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 32866751.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 87
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1512.0,
|
|
"completions/max_terminated_length": 1512.0,
|
|
"completions/mean_length": 74.3125,
|
|
"completions/mean_terminated_length": 514.1621704101562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.1111111111111116e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 33228319.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 88
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.89453125,
|
|
"completions/max_length": 1529.0,
|
|
"completions/max_terminated_length": 1529.0,
|
|
"completions/mean_length": 57.05859375,
|
|
"completions/mean_terminated_length": 541.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.1424,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.0833333333333336e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 33598326.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 89
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1471.0,
|
|
"completions/max_terminated_length": 1471.0,
|
|
"completions/mean_length": 94.0078125,
|
|
"completions/mean_terminated_length": 601.6500244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.055555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 33967808.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 99.3203125,
|
|
"completions/mean_terminated_length": 635.6500244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.1456,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.0277777777777776e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 34329962.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 91
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1473.0,
|
|
"completions/max_terminated_length": 1473.0,
|
|
"completions/mean_length": 92.9375,
|
|
"completions/mean_terminated_length": 566.4761962890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 34702698.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 92
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1491.0,
|
|
"completions/max_terminated_length": 1491.0,
|
|
"completions/mean_length": 90.41015625,
|
|
"completions/mean_terminated_length": 642.9166870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.1488,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.9722222222222225e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35075211.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 93
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1228.0,
|
|
"completions/max_terminated_length": 1228.0,
|
|
"completions/mean_length": 66.79296875,
|
|
"completions/mean_terminated_length": 474.97222900390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 19.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35436910.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 94
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1478.0,
|
|
"completions/max_terminated_length": 1478.0,
|
|
"completions/mean_length": 97.078125,
|
|
"completions/mean_terminated_length": 591.7142944335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 20.0,
|
|
"epoch": 0.152,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.916666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35819866.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8828125,
|
|
"completions/max_length": 1486.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 59.984375,
|
|
"completions/mean_terminated_length": 511.86669921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.888888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 36199110.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 96
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1359.0,
|
|
"completions/max_terminated_length": 1359.0,
|
|
"completions/mean_length": 63.32421875,
|
|
"completions/mean_terminated_length": 559.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 26.0,
|
|
"epoch": 0.1552,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.861111111111111e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 36558633.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 97
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1497.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 102.02734375,
|
|
"completions/mean_terminated_length": 687.3421020507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.8333333333333335e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 36922352.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 98
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1522.0,
|
|
"completions/max_terminated_length": 1522.0,
|
|
"completions/mean_length": 101.88671875,
|
|
"completions/mean_terminated_length": 621.0238037109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.1584,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.805555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 37305955.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 99
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1505.0,
|
|
"completions/max_terminated_length": 1505.0,
|
|
"completions/mean_length": 97.10546875,
|
|
"completions/mean_terminated_length": 621.4750366210938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.7777777777777783e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 37700222.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1462.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 74.58984375,
|
|
"completions/mean_terminated_length": 596.71875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 107.0,
|
|
"epoch": 0.1616,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.7500000000000004e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 38080061.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 101
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1391.0,
|
|
"completions/max_terminated_length": 1391.0,
|
|
"completions/mean_length": 82.86328125,
|
|
"completions/mean_terminated_length": 517.3901977539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.7222222222222224e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 38450642.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 102
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1468.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 86.62109375,
|
|
"completions/mean_terminated_length": 652.2058715820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.1648,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.6944444444444444e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 38838233.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 103
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1517.0,
|
|
"completions/max_terminated_length": 1517.0,
|
|
"completions/mean_length": 101.23046875,
|
|
"completions/mean_terminated_length": 617.0238037109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39211388.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 104
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87890625,
|
|
"completions/max_length": 1387.0,
|
|
"completions/max_terminated_length": 1387.0,
|
|
"completions/mean_length": 73.390625,
|
|
"completions/mean_terminated_length": 606.0645141601562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 17.0,
|
|
"epoch": 0.168,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.6388888888888893e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39591112.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90625,
|
|
"completions/max_length": 1518.0,
|
|
"completions/max_terminated_length": 1518.0,
|
|
"completions/mean_length": 62.890625,
|
|
"completions/mean_terminated_length": 670.8333740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.6111111111111113e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39973988.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 106
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1433.0,
|
|
"completions/max_terminated_length": 1433.0,
|
|
"completions/mean_length": 77.8203125,
|
|
"completions/mean_terminated_length": 569.2000122070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 18.0,
|
|
"epoch": 0.1712,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5833333333333337e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 40355750.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 107
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1531.0,
|
|
"completions/max_terminated_length": 1531.0,
|
|
"completions/mean_length": 108.99609375,
|
|
"completions/mean_terminated_length": 581.3125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5555555555555557e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 40736717.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 108
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8828125,
|
|
"completions/max_length": 1423.0,
|
|
"completions/max_terminated_length": 1423.0,
|
|
"completions/mean_length": 64.22265625,
|
|
"completions/mean_terminated_length": 548.0333862304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 28.0,
|
|
"epoch": 0.1744,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5277777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 41096742.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 109
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1469.0,
|
|
"completions/max_terminated_length": 1469.0,
|
|
"completions/mean_length": 71.94921875,
|
|
"completions/mean_terminated_length": 558.1515502929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 41469713.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1479.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 82.984375,
|
|
"completions/mean_terminated_length": 643.757568359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.1776,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.4722222222222226e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 41842861.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 111
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1526.0,
|
|
"completions/max_terminated_length": 1526.0,
|
|
"completions/mean_length": 84.08203125,
|
|
"completions/mean_terminated_length": 597.9166870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 13.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.4444444444444447e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 42223010.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 112
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1479.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 78.984375,
|
|
"completions/mean_terminated_length": 612.727294921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 84.0,
|
|
"epoch": 0.1808,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.4166666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 42576774.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 113
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.90625,
|
|
"completions/max_length": 1409.0,
|
|
"completions/max_terminated_length": 1409.0,
|
|
"completions/mean_length": 61.953125,
|
|
"completions/mean_terminated_length": 660.8333740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.388888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 42936170.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 114
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1453.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 89.3828125,
|
|
"completions/mean_terminated_length": 602.1578979492188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.184,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.361111111111111e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 43311860.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1511.0,
|
|
"completions/max_terminated_length": 1511.0,
|
|
"completions/mean_length": 105.31640625,
|
|
"completions/mean_terminated_length": 641.9285888671875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.3333333333333336e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 43691325.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 116
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1358.0,
|
|
"completions/max_terminated_length": 1358.0,
|
|
"completions/mean_length": 92.14453125,
|
|
"completions/mean_terminated_length": 561.6428833007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1872,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.305555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 44055330.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 117
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1526.0,
|
|
"completions/max_terminated_length": 1526.0,
|
|
"completions/mean_length": 103.859375,
|
|
"completions/mean_terminated_length": 681.7435913085938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.277777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 44424398.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 118
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1475.0,
|
|
"completions/max_terminated_length": 1475.0,
|
|
"completions/mean_length": 92.75,
|
|
"completions/mean_terminated_length": 624.8421020507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.1904,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 44795822.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 119
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1435.0,
|
|
"completions/max_terminated_length": 1435.0,
|
|
"completions/mean_length": 83.48046875,
|
|
"completions/mean_terminated_length": 577.5946044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 45151641.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.82421875,
|
|
"completions/max_length": 1519.0,
|
|
"completions/max_terminated_length": 1519.0,
|
|
"completions/mean_length": 111.44140625,
|
|
"completions/mean_terminated_length": 633.977783203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.1936,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.1944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 45509442.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 121
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1426.0,
|
|
"completions/max_terminated_length": 1426.0,
|
|
"completions/mean_length": 84.8125,
|
|
"completions/mean_terminated_length": 620.3428344726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 28.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.166666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 45884586.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 122
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1483.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 74.0703125,
|
|
"completions/mean_terminated_length": 592.5625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.1968,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.138888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 46257996.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 123
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1421.0,
|
|
"completions/max_terminated_length": 1421.0,
|
|
"completions/mean_length": 73.91015625,
|
|
"completions/mean_terminated_length": 556.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 84.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 46626581.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 124
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1477.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 85.8671875,
|
|
"completions/mean_terminated_length": 563.6410522460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.2,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.0833333333333334e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 47005251.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1476.0,
|
|
"completions/max_terminated_length": 1476.0,
|
|
"completions/mean_length": 99.109375,
|
|
"completions/mean_terminated_length": 618.8292236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.0555555555555555e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 47389567.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 126
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8828125,
|
|
"completions/max_length": 1502.0,
|
|
"completions/max_terminated_length": 1502.0,
|
|
"completions/mean_length": 80.375,
|
|
"completions/mean_terminated_length": 685.86669921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.2032,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.027777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 47769455.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 127
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87890625,
|
|
"completions/max_length": 1480.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 65.86328125,
|
|
"completions/mean_terminated_length": 543.9031982421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 25.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 48144572.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 128
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.921875,
|
|
"completions/max_length": 1346.0,
|
|
"completions/max_terminated_length": 1346.0,
|
|
"completions/mean_length": 35.14453125,
|
|
"completions/mean_terminated_length": 449.8500061035156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.2064,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.9722222222222224e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 48511137.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 129
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1410.0,
|
|
"completions/max_terminated_length": 1410.0,
|
|
"completions/mean_length": 82.51171875,
|
|
"completions/mean_terminated_length": 621.2647094726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.944444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 48866972.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1513.0,
|
|
"completions/max_terminated_length": 1513.0,
|
|
"completions/mean_length": 78.640625,
|
|
"completions/mean_terminated_length": 694.2069091796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.2096,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.916666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 49244216.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 131
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.890625,
|
|
"completions/max_length": 1494.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 64.32421875,
|
|
"completions/mean_terminated_length": 588.107177734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 19.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.888888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 49615747.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 132
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1485.0,
|
|
"completions/max_terminated_length": 1485.0,
|
|
"completions/mean_length": 80.04296875,
|
|
"completions/mean_terminated_length": 512.2750244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.2128,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.8611111111111113e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 49983462.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 133
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1423.0,
|
|
"completions/max_terminated_length": 1423.0,
|
|
"completions/mean_length": 105.18359375,
|
|
"completions/mean_terminated_length": 690.4359130859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.2144,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.8333333333333333e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 50355429.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 134
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1402.0,
|
|
"completions/max_terminated_length": 1402.0,
|
|
"completions/mean_length": 69.14453125,
|
|
"completions/mean_terminated_length": 553.15625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.216,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.8055555555555557e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 50716714.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1521.0,
|
|
"completions/max_terminated_length": 1521.0,
|
|
"completions/mean_length": 102.4140625,
|
|
"completions/mean_terminated_length": 672.2564086914062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.2176,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.777777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51111204.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 136
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1533.0,
|
|
"completions/max_terminated_length": 1533.0,
|
|
"completions/mean_length": 88.69921875,
|
|
"completions/mean_terminated_length": 688.0909423828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.2192,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.75e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51490127.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 137
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 97.671875,
|
|
"completions/mean_terminated_length": 625.1000366210938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 5.0,
|
|
"epoch": 0.2208,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.7222222222222224e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51874467.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 138
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.9140625,
|
|
"completions/max_length": 1528.0,
|
|
"completions/max_terminated_length": 1528.0,
|
|
"completions/mean_length": 50.91796875,
|
|
"completions/mean_terminated_length": 592.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.2224,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.6944444444444446e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 52239766.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 139
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87890625,
|
|
"completions/max_length": 1497.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 57.9453125,
|
|
"completions/mean_terminated_length": 478.51611328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 52613024.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1473.0,
|
|
"completions/max_terminated_length": 1473.0,
|
|
"completions/mean_length": 84.6171875,
|
|
"completions/mean_terminated_length": 746.9655151367188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.2256,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.638888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 52993446.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 141
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1456.0,
|
|
"completions/max_terminated_length": 1456.0,
|
|
"completions/mean_length": 92.37109375,
|
|
"completions/mean_terminated_length": 738.96875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2272,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.6111111111111113e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 53385989.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 142
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.890625,
|
|
"completions/max_length": 1308.0,
|
|
"completions/max_terminated_length": 1308.0,
|
|
"completions/mean_length": 59.78515625,
|
|
"completions/mean_terminated_length": 546.607177734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.2288,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.5833333333333333e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 53769790.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 143
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87890625,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 77.9921875,
|
|
"completions/mean_terminated_length": 644.0645141601562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.2304,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 54138484.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 144
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1495.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 94.0390625,
|
|
"completions/mean_terminated_length": 650.648681640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.232,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.527777777777778e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 54530294.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1375.0,
|
|
"completions/max_terminated_length": 1375.0,
|
|
"completions/mean_length": 95.390625,
|
|
"completions/mean_terminated_length": 660.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.2336,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 54914330.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 146
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1507.0,
|
|
"completions/max_terminated_length": 1507.0,
|
|
"completions/mean_length": 94.09375,
|
|
"completions/mean_terminated_length": 708.4705810546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 5.0,
|
|
"epoch": 0.2352,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.4722222222222225e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 55283858.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 147
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83203125,
|
|
"completions/max_length": 1458.0,
|
|
"completions/max_terminated_length": 1458.0,
|
|
"completions/mean_length": 100.16015625,
|
|
"completions/mean_terminated_length": 596.3023071289062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2368,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.4444444444444445e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 55663283.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 148
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1480.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 95.8671875,
|
|
"completions/mean_terminated_length": 663.2973022460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.2384,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.4166666666666667e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 56047041.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 149
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1289.0,
|
|
"completions/max_terminated_length": 1289.0,
|
|
"completions/mean_length": 66.51953125,
|
|
"completions/mean_terminated_length": 486.5428466796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.3888888888888892e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 56400558.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.82421875,
|
|
"completions/max_length": 1489.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 105.6171875,
|
|
"completions/mean_terminated_length": 600.844482421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.2416,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.3611111111111112e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 56778492.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 151
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1448.0,
|
|
"completions/max_terminated_length": 1448.0,
|
|
"completions/mean_length": 94.42578125,
|
|
"completions/mean_terminated_length": 575.547607421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.2432,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.3333333333333334e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 57140545.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 152
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1536.0,
|
|
"completions/mean_length": 90.97265625,
|
|
"completions/mean_terminated_length": 684.9705810546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.2448,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.3055555555555556e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 57516770.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 153
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1535.0,
|
|
"completions/max_terminated_length": 1535.0,
|
|
"completions/mean_length": 89.671875,
|
|
"completions/mean_terminated_length": 604.1052856445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 16.0,
|
|
"epoch": 0.2464,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.2777777777777779e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 57892726.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 154
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1410.0,
|
|
"completions/max_terminated_length": 1410.0,
|
|
"completions/mean_length": 88.7421875,
|
|
"completions/mean_terminated_length": 631.0555419921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.248,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 58273708.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1470.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 86.375,
|
|
"completions/mean_terminated_length": 650.3529663085938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 17.0,
|
|
"epoch": 0.2496,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.2222222222222223e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 58647788.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 156
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1493.0,
|
|
"completions/max_terminated_length": 1493.0,
|
|
"completions/mean_length": 83.45703125,
|
|
"completions/mean_terminated_length": 593.4722290039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.2512,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.1944444444444446e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 59015529.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 157
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1276.0,
|
|
"completions/max_terminated_length": 1276.0,
|
|
"completions/mean_length": 108.015625,
|
|
"completions/mean_terminated_length": 576.0833740234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2528,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.1666666666666668e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 59412013.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 158
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8828125,
|
|
"completions/max_length": 1390.0,
|
|
"completions/max_terminated_length": 1390.0,
|
|
"completions/mean_length": 65.76171875,
|
|
"completions/mean_terminated_length": 561.1666870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 11.0,
|
|
"epoch": 0.2544,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.138888888888889e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 59782528.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 159
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.78515625,
|
|
"completions/max_length": 1319.0,
|
|
"completions/max_terminated_length": 1319.0,
|
|
"completions/mean_length": 120.625,
|
|
"completions/mean_terminated_length": 561.4545288085938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.111111111111111e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 60157520.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1412.0,
|
|
"completions/max_terminated_length": 1412.0,
|
|
"completions/mean_length": 100.26953125,
|
|
"completions/mean_terminated_length": 733.4000244140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.2576,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.0833333333333335e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 60532317.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 161
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.828125,
|
|
"completions/max_length": 1395.0,
|
|
"completions/max_terminated_length": 1395.0,
|
|
"completions/mean_length": 98.61328125,
|
|
"completions/mean_terminated_length": 573.75,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.2592,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.0555555555555557e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 60898842.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 162
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1408.0,
|
|
"completions/max_terminated_length": 1408.0,
|
|
"completions/mean_length": 96.0625,
|
|
"completions/mean_terminated_length": 683.1111450195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.2608,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.0277777777777777e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 61272290.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 163
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1479.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 116.3671875,
|
|
"completions/mean_terminated_length": 744.75,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.2624,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 61660200.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 164
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8203125,
|
|
"completions/max_length": 1481.0,
|
|
"completions/max_terminated_length": 1481.0,
|
|
"completions/mean_length": 129.41015625,
|
|
"completions/mean_terminated_length": 720.1956787109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.264,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 9.722222222222224e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 62033737.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1533.0,
|
|
"completions/max_terminated_length": 1533.0,
|
|
"completions/mean_length": 88.7734375,
|
|
"completions/mean_terminated_length": 668.4117431640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.2656,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 9.444444444444445e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 62408999.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 166
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1532.0,
|
|
"completions/max_terminated_length": 1532.0,
|
|
"completions/mean_length": 100.5234375,
|
|
"completions/mean_terminated_length": 612.7142944335938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.2672,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 9.166666666666666e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 62801453.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 167
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1449.0,
|
|
"completions/max_terminated_length": 1449.0,
|
|
"completions/mean_length": 94.0234375,
|
|
"completions/mean_terminated_length": 650.54052734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.2688,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 8.88888888888889e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 63175363.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 168
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1249.0,
|
|
"completions/max_terminated_length": 1249.0,
|
|
"completions/mean_length": 91.1796875,
|
|
"completions/mean_terminated_length": 614.26318359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 70.0,
|
|
"epoch": 0.2704,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 8.611111111111112e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 63556105.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 169
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87109375,
|
|
"completions/max_length": 1450.0,
|
|
"completions/max_terminated_length": 1450.0,
|
|
"completions/mean_length": 77.1328125,
|
|
"completions/mean_terminated_length": 598.3636474609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 63907227.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.88671875,
|
|
"completions/max_length": 1408.0,
|
|
"completions/max_terminated_length": 1408.0,
|
|
"completions/mean_length": 60.9375,
|
|
"completions/mean_terminated_length": 537.9310302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.2736,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 8.055555555555557e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 64275131.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 171
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1492.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 79.08203125,
|
|
"completions/mean_terminated_length": 595.441162109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.2752,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 7.777777777777779e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 64657800.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 172
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1533.0,
|
|
"completions/max_terminated_length": 1533.0,
|
|
"completions/mean_length": 100.125,
|
|
"completions/mean_terminated_length": 712.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.2768,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 65036304.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 173
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1447.0,
|
|
"completions/max_terminated_length": 1447.0,
|
|
"completions/mean_length": 88.95703125,
|
|
"completions/mean_terminated_length": 569.3250122070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.2784,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 7.222222222222222e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 65418037.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 174
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 94.8671875,
|
|
"completions/mean_terminated_length": 622.7179565429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 114.0,
|
|
"epoch": 0.28,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 6.944444444444446e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 65783035.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.91796875,
|
|
"completions/max_length": 1308.0,
|
|
"completions/max_terminated_length": 1308.0,
|
|
"completions/mean_length": 38.36328125,
|
|
"completions/mean_terminated_length": 467.66668701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.2816,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 6.666666666666667e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 66132056.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 176
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8359375,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 107.73046875,
|
|
"completions/mean_terminated_length": 656.6428833007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 13.0,
|
|
"epoch": 0.2832,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 6.388888888888889e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 66514435.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 177
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.85546875,
|
|
"completions/max_length": 1408.0,
|
|
"completions/max_terminated_length": 1408.0,
|
|
"completions/mean_length": 84.62890625,
|
|
"completions/mean_terminated_length": 585.54052734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.2848,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 6.111111111111112e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 66880156.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 178
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.828125,
|
|
"completions/max_length": 1363.0,
|
|
"completions/max_terminated_length": 1363.0,
|
|
"completions/mean_length": 95.90234375,
|
|
"completions/mean_terminated_length": 557.977294921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.2864,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.833333333333334e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 67255579.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 179
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.87890625,
|
|
"completions/max_length": 1449.0,
|
|
"completions/max_terminated_length": 1449.0,
|
|
"completions/mean_length": 64.99609375,
|
|
"completions/mean_terminated_length": 536.741943359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.555555555555555e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 67609802.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1494.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 82.04296875,
|
|
"completions/mean_terminated_length": 656.34375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.2896,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.277777777777779e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 67981549.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 181
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.82421875,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 122.66015625,
|
|
"completions/mean_terminated_length": 697.7999877929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.2912,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 68357534.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 182
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1523.0,
|
|
"completions/max_terminated_length": 1523.0,
|
|
"completions/mean_length": 107.69140625,
|
|
"completions/mean_terminated_length": 672.4146118164062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.2928,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.7222222222222226e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 68725335.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 183
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.86328125,
|
|
"completions/max_length": 1514.0,
|
|
"completions/max_terminated_length": 1514.0,
|
|
"completions/mean_length": 88.5,
|
|
"completions/mean_terminated_length": 647.3142700195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.2944,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 69098311.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 184
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1453.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 96.26953125,
|
|
"completions/mean_terminated_length": 631.923095703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.296,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 69474452.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1505.0,
|
|
"completions/max_terminated_length": 1505.0,
|
|
"completions/mean_length": 95.234375,
|
|
"completions/mean_terminated_length": 609.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 58.0,
|
|
"epoch": 0.2976,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.8888888888888895e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 69841976.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 186
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.890625,
|
|
"completions/max_length": 1480.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 68.9921875,
|
|
"completions/mean_terminated_length": 630.7857666015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.2992,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.611111111111111e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 70207238.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 187
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8671875,
|
|
"completions/max_length": 1457.0,
|
|
"completions/max_terminated_length": 1457.0,
|
|
"completions/mean_length": 72.6796875,
|
|
"completions/mean_terminated_length": 547.2352905273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.0,
|
|
"epoch": 0.3008,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.3333333333333335e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 70591916.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 188
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8203125,
|
|
"completions/max_length": 1401.0,
|
|
"completions/max_terminated_length": 1401.0,
|
|
"completions/mean_length": 105.11328125,
|
|
"completions/mean_terminated_length": 584.978271484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.3024,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 3.055555555555556e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 70961801.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 189
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83984375,
|
|
"completions/max_length": 1490.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 98.6875,
|
|
"completions/mean_terminated_length": 616.1951293945312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.7777777777777776e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 71336625.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.81640625,
|
|
"completions/max_length": 1498.0,
|
|
"completions/max_terminated_length": 1498.0,
|
|
"completions/mean_length": 109.30078125,
|
|
"completions/mean_terminated_length": 595.3403930664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.3056,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.5000000000000004e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 71719838.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 191
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1387.0,
|
|
"completions/max_terminated_length": 1387.0,
|
|
"completions/mean_length": 73.8125,
|
|
"completions/mean_terminated_length": 497.2631530761719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.3072,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.2222222222222224e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 72097886.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 192
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.8515625,
|
|
"completions/max_length": 1516.0,
|
|
"completions/max_terminated_length": 1516.0,
|
|
"completions/mean_length": 109.515625,
|
|
"completions/mean_terminated_length": 737.7894897460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.3088,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.9444444444444447e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 72485010.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 193
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1482.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 95.6328125,
|
|
"completions/mean_terminated_length": 680.0555419921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.3104,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.6666666666666668e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 72866236.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 194
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.828125,
|
|
"completions/max_length": 1469.0,
|
|
"completions/max_terminated_length": 1469.0,
|
|
"completions/mean_length": 98.37890625,
|
|
"completions/mean_terminated_length": 572.3863525390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.312,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.3888888888888888e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 73242717.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84765625,
|
|
"completions/max_length": 1502.0,
|
|
"completions/max_terminated_length": 1502.0,
|
|
"completions/mean_length": 90.1328125,
|
|
"completions/mean_terminated_length": 591.6410522460938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.3136,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1.1111111111111112e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 73620487.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 196
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.84375,
|
|
"completions/max_length": 1494.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 100.1953125,
|
|
"completions/mean_terminated_length": 641.25,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.3152,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 8.333333333333334e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 74000257.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 197
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.83203125,
|
|
"completions/max_length": 1489.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 108.84375,
|
|
"completions/mean_terminated_length": 648.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 16.0,
|
|
"epoch": 0.3168,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 5.555555555555556e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 74382729.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 198
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.859375,
|
|
"completions/max_length": 1394.0,
|
|
"completions/max_terminated_length": 1394.0,
|
|
"completions/mean_length": 85.1484375,
|
|
"completions/mean_terminated_length": 605.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.3184,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 2.777777777777778e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 74760231.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 199
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": NaN,
|
|
"clip_ratio/high_mean": NaN,
|
|
"clip_ratio/low_mean": NaN,
|
|
"clip_ratio/low_min": NaN,
|
|
"clip_ratio/region_mean": NaN,
|
|
"completions/clipped_ratio": 0.89453125,
|
|
"completions/max_length": 1493.0,
|
|
"completions/max_terminated_length": 1493.0,
|
|
"completions/mean_length": 72.171875,
|
|
"completions/mean_terminated_length": 684.2963256835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0,
|
|
"num_tokens": 75104483.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward_step": 0.0,
|
|
"rewards/final_brier_reward_step": 0.0,
|
|
"rewards/format_reward_step": 0.0,
|
|
"rewards/stepwise_brier_reward": 0.0,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.004646302500041202,
|
|
"train_runtime": 12222.6581,
|
|
"train_samples_per_second": 4.189,
|
|
"train_steps_per_second": 0.016
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 75104483,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 20,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|