10028 lines
358 KiB
JSON
10028 lines
358 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 500,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 386.0,
|
|
"completions/max_terminated_length": 386.0,
|
|
"completions/mean_length": 129.8984375,
|
|
"completions/mean_terminated_length": 129.8984375,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.5696932673454285,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0045,
|
|
"num_tokens": 682196.0,
|
|
"reward": 0.8896484375,
|
|
"reward_std": 0.2799686789512634,
|
|
"rewards/accuracy_reward_conf_tag": 0.4140625,
|
|
"rewards/format_reward_conf_tag": 0.951171875,
|
|
"step": 1
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 466.0,
|
|
"completions/max_terminated_length": 466.0,
|
|
"completions/mean_length": 135.37890625,
|
|
"completions/mean_terminated_length": 135.37890625,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.7087669968605042,
|
|
"learning_rate": 3.125e-08,
|
|
"loss": -0.0005,
|
|
"num_tokens": 1388414.0,
|
|
"reward": 0.876953125,
|
|
"reward_std": 0.2851727604866028,
|
|
"rewards/accuracy_reward_conf_tag": 0.41015625,
|
|
"rewards/format_reward_conf_tag": 0.93359375,
|
|
"step": 2
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 400.0,
|
|
"completions/max_terminated_length": 400.0,
|
|
"completions/mean_length": 132.919921875,
|
|
"completions/mean_terminated_length": 132.919921875,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.6015397906303406,
|
|
"learning_rate": 6.25e-08,
|
|
"loss": 0.0152,
|
|
"num_tokens": 2114917.0,
|
|
"reward": 0.91796875,
|
|
"reward_std": 0.2880901098251343,
|
|
"rewards/accuracy_reward_conf_tag": 0.44140625,
|
|
"rewards/format_reward_conf_tag": 0.953125,
|
|
"step": 3
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 321.0,
|
|
"completions/max_terminated_length": 321.0,
|
|
"completions/mean_length": 130.029296875,
|
|
"completions/mean_terminated_length": 130.029296875,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.614819347858429,
|
|
"learning_rate": 9.375e-08,
|
|
"loss": 0.0048,
|
|
"num_tokens": 2800780.0,
|
|
"reward": 0.93359375,
|
|
"reward_std": 0.3034687042236328,
|
|
"rewards/accuracy_reward_conf_tag": 0.46484375,
|
|
"rewards/format_reward_conf_tag": 0.9375,
|
|
"step": 4
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 478.0,
|
|
"completions/max_terminated_length": 478.0,
|
|
"completions/mean_length": 148.236328125,
|
|
"completions/mean_terminated_length": 148.236328125,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.4650380313396454,
|
|
"learning_rate": 1.25e-07,
|
|
"loss": 0.0037,
|
|
"num_tokens": 3524981.0,
|
|
"reward": 0.7470703125,
|
|
"reward_std": 0.2420598566532135,
|
|
"rewards/accuracy_reward_conf_tag": 0.28125,
|
|
"rewards/format_reward_conf_tag": 0.931640625,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 408.0,
|
|
"completions/max_terminated_length": 408.0,
|
|
"completions/mean_length": 138.994140625,
|
|
"completions/mean_terminated_length": 138.994140625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.5227891206741333,
|
|
"learning_rate": 1.5624999999999999e-07,
|
|
"loss": -0.0016,
|
|
"num_tokens": 4245562.0,
|
|
"reward": 0.912109375,
|
|
"reward_std": 0.29249250888824463,
|
|
"rewards/accuracy_reward_conf_tag": 0.435546875,
|
|
"rewards/format_reward_conf_tag": 0.953125,
|
|
"step": 6
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 480.0,
|
|
"completions/max_terminated_length": 480.0,
|
|
"completions/mean_length": 137.193359375,
|
|
"completions/mean_terminated_length": 137.193359375,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.4919546842575073,
|
|
"learning_rate": 1.875e-07,
|
|
"loss": -0.0021,
|
|
"num_tokens": 4975437.0,
|
|
"reward": 0.91015625,
|
|
"reward_std": 0.2555137276649475,
|
|
"rewards/accuracy_reward_conf_tag": 0.431640625,
|
|
"rewards/format_reward_conf_tag": 0.95703125,
|
|
"step": 7
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 479.0,
|
|
"completions/max_terminated_length": 479.0,
|
|
"completions/mean_length": 134.935546875,
|
|
"completions/mean_terminated_length": 134.935546875,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.5210925936698914,
|
|
"learning_rate": 2.1875e-07,
|
|
"loss": 0.0074,
|
|
"num_tokens": 5710548.0,
|
|
"reward": 0.8974609375,
|
|
"reward_std": 0.2795425057411194,
|
|
"rewards/accuracy_reward_conf_tag": 0.416015625,
|
|
"rewards/format_reward_conf_tag": 0.962890625,
|
|
"step": 8
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 745.0,
|
|
"completions/max_terminated_length": 745.0,
|
|
"completions/mean_length": 144.29296875,
|
|
"completions/mean_terminated_length": 144.29296875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.4255554676055908,
|
|
"learning_rate": 2.5e-07,
|
|
"loss": 0.0074,
|
|
"num_tokens": 6401338.0,
|
|
"reward": 0.826171875,
|
|
"reward_std": 0.24940608441829681,
|
|
"rewards/accuracy_reward_conf_tag": 0.34765625,
|
|
"rewards/format_reward_conf_tag": 0.95703125,
|
|
"step": 9
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 444.0,
|
|
"completions/max_terminated_length": 444.0,
|
|
"completions/mean_length": 136.876953125,
|
|
"completions/mean_terminated_length": 136.876953125,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.5149644017219543,
|
|
"learning_rate": 2.8125e-07,
|
|
"loss": 0.0045,
|
|
"num_tokens": 7122283.0,
|
|
"reward": 0.8369140625,
|
|
"reward_std": 0.2635266184806824,
|
|
"rewards/accuracy_reward_conf_tag": 0.357421875,
|
|
"rewards/format_reward_conf_tag": 0.958984375,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 527.0,
|
|
"completions/max_terminated_length": 527.0,
|
|
"completions/mean_length": 147.564453125,
|
|
"completions/mean_terminated_length": 147.564453125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.3749123215675354,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 7844076.0,
|
|
"reward": 0.7958984375,
|
|
"reward_std": 0.18583612143993378,
|
|
"rewards/accuracy_reward_conf_tag": 0.314453125,
|
|
"rewards/format_reward_conf_tag": 0.962890625,
|
|
"step": 11
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 489.0,
|
|
"completions/max_terminated_length": 489.0,
|
|
"completions/mean_length": 131.255859375,
|
|
"completions/mean_terminated_length": 131.255859375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.43781086802482605,
|
|
"learning_rate": 3.4375e-07,
|
|
"loss": 0.0067,
|
|
"num_tokens": 8555231.0,
|
|
"reward": 0.88671875,
|
|
"reward_std": 0.21360914409160614,
|
|
"rewards/accuracy_reward_conf_tag": 0.40234375,
|
|
"rewards/format_reward_conf_tag": 0.96875,
|
|
"step": 12
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 365.0,
|
|
"completions/max_terminated_length": 365.0,
|
|
"completions/mean_length": 144.42578125,
|
|
"completions/mean_terminated_length": 144.42578125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.43813973665237427,
|
|
"learning_rate": 3.75e-07,
|
|
"loss": 0.0025,
|
|
"num_tokens": 9294841.0,
|
|
"reward": 0.826171875,
|
|
"reward_std": 0.22373488545417786,
|
|
"rewards/accuracy_reward_conf_tag": 0.3515625,
|
|
"rewards/format_reward_conf_tag": 0.94921875,
|
|
"step": 13
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 435.0,
|
|
"completions/max_terminated_length": 435.0,
|
|
"completions/mean_length": 146.2109375,
|
|
"completions/mean_terminated_length": 146.2109375,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.39122122526168823,
|
|
"learning_rate": 4.0625e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 10000757.0,
|
|
"reward": 0.865234375,
|
|
"reward_std": 0.1890736222267151,
|
|
"rewards/accuracy_reward_conf_tag": 0.380859375,
|
|
"rewards/format_reward_conf_tag": 0.96875,
|
|
"step": 14
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 812.0,
|
|
"completions/max_terminated_length": 812.0,
|
|
"completions/mean_length": 139.2890625,
|
|
"completions/mean_terminated_length": 139.2890625,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.4133016765117645,
|
|
"learning_rate": 4.375e-07,
|
|
"loss": 0.0106,
|
|
"num_tokens": 10714945.0,
|
|
"reward": 0.89453125,
|
|
"reward_std": 0.20083385705947876,
|
|
"rewards/accuracy_reward_conf_tag": 0.408203125,
|
|
"rewards/format_reward_conf_tag": 0.97265625,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 128.763671875,
|
|
"completions/mean_terminated_length": 128.763671875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.45241987705230713,
|
|
"learning_rate": 4.6874999999999996e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 11406400.0,
|
|
"reward": 0.87109375,
|
|
"reward_std": 0.22898858785629272,
|
|
"rewards/accuracy_reward_conf_tag": 0.37890625,
|
|
"rewards/format_reward_conf_tag": 0.984375,
|
|
"step": 16
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 529.0,
|
|
"completions/max_terminated_length": 529.0,
|
|
"completions/mean_length": 135.091796875,
|
|
"completions/mean_terminated_length": 135.091796875,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.43074896931648254,
|
|
"learning_rate": 5e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 12121991.0,
|
|
"reward": 0.8603515625,
|
|
"reward_std": 0.20363333821296692,
|
|
"rewards/accuracy_reward_conf_tag": 0.376953125,
|
|
"rewards/format_reward_conf_tag": 0.966796875,
|
|
"step": 17
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 479.0,
|
|
"completions/max_terminated_length": 479.0,
|
|
"completions/mean_length": 133.248046875,
|
|
"completions/mean_terminated_length": 133.248046875,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.5622017979621887,
|
|
"learning_rate": 5.3125e-07,
|
|
"loss": 0.01,
|
|
"num_tokens": 12832646.0,
|
|
"reward": 1.0009765625,
|
|
"reward_std": 0.2823396921157837,
|
|
"rewards/accuracy_reward_conf_tag": 0.51171875,
|
|
"rewards/format_reward_conf_tag": 0.978515625,
|
|
"step": 18
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 595.0,
|
|
"completions/max_terminated_length": 595.0,
|
|
"completions/mean_length": 137.880859375,
|
|
"completions/mean_terminated_length": 137.880859375,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.42013996839523315,
|
|
"learning_rate": 5.625e-07,
|
|
"loss": 0.0064,
|
|
"num_tokens": 13554545.0,
|
|
"reward": 0.9013671875,
|
|
"reward_std": 0.1799347698688507,
|
|
"rewards/accuracy_reward_conf_tag": 0.412109375,
|
|
"rewards/format_reward_conf_tag": 0.978515625,
|
|
"step": 19
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 572.0,
|
|
"completions/max_terminated_length": 572.0,
|
|
"completions/mean_length": 143.6171875,
|
|
"completions/mean_terminated_length": 143.6171875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.38607338070869446,
|
|
"learning_rate": 5.937499999999999e-07,
|
|
"loss": 0.0026,
|
|
"num_tokens": 14259629.0,
|
|
"reward": 0.85546875,
|
|
"reward_std": 0.1840585172176361,
|
|
"rewards/accuracy_reward_conf_tag": 0.361328125,
|
|
"rewards/format_reward_conf_tag": 0.98828125,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 130.44921875,
|
|
"completions/mean_terminated_length": 130.44921875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.43252310156822205,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0048,
|
|
"num_tokens": 14972307.0,
|
|
"reward": 0.92578125,
|
|
"reward_std": 0.18132153153419495,
|
|
"rewards/accuracy_reward_conf_tag": 0.435546875,
|
|
"rewards/format_reward_conf_tag": 0.98046875,
|
|
"step": 21
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 409.0,
|
|
"completions/max_terminated_length": 409.0,
|
|
"completions/mean_length": 129.318359375,
|
|
"completions/mean_terminated_length": 129.318359375,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.4275171756744385,
|
|
"learning_rate": 6.5625e-07,
|
|
"loss": 0.0071,
|
|
"num_tokens": 15702902.0,
|
|
"reward": 0.890625,
|
|
"reward_std": 0.16756784915924072,
|
|
"rewards/accuracy_reward_conf_tag": 0.396484375,
|
|
"rewards/format_reward_conf_tag": 0.98828125,
|
|
"step": 22
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 379.0,
|
|
"completions/max_terminated_length": 379.0,
|
|
"completions/mean_length": 123.634765625,
|
|
"completions/mean_terminated_length": 123.634765625,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.41762423515319824,
|
|
"learning_rate": 6.875e-07,
|
|
"loss": 0.0014,
|
|
"num_tokens": 16385827.0,
|
|
"reward": 0.9150390625,
|
|
"reward_std": 0.15485742688179016,
|
|
"rewards/accuracy_reward_conf_tag": 0.421875,
|
|
"rewards/format_reward_conf_tag": 0.986328125,
|
|
"step": 23
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 630.0,
|
|
"completions/max_terminated_length": 630.0,
|
|
"completions/mean_length": 122.8671875,
|
|
"completions/mean_terminated_length": 122.8671875,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.32948175072669983,
|
|
"learning_rate": 7.1875e-07,
|
|
"loss": 0.0043,
|
|
"num_tokens": 17064855.0,
|
|
"reward": 0.9697265625,
|
|
"reward_std": 0.12152266502380371,
|
|
"rewards/accuracy_reward_conf_tag": 0.470703125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 24
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 657.0,
|
|
"completions/max_terminated_length": 657.0,
|
|
"completions/mean_length": 119.166015625,
|
|
"completions/mean_terminated_length": 119.166015625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.4611853361129761,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 17780772.0,
|
|
"reward": 0.96875,
|
|
"reward_std": 0.2091064751148224,
|
|
"rewards/accuracy_reward_conf_tag": 0.47265625,
|
|
"rewards/format_reward_conf_tag": 0.9921875,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 449.0,
|
|
"completions/max_terminated_length": 449.0,
|
|
"completions/mean_length": 120.908203125,
|
|
"completions/mean_terminated_length": 120.908203125,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.3272891640663147,
|
|
"learning_rate": 7.812499999999999e-07,
|
|
"loss": 0.0073,
|
|
"num_tokens": 18493589.0,
|
|
"reward": 0.986328125,
|
|
"reward_std": 0.12441777437925339,
|
|
"rewards/accuracy_reward_conf_tag": 0.490234375,
|
|
"rewards/format_reward_conf_tag": 0.9921875,
|
|
"step": 26
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 537.0,
|
|
"completions/max_terminated_length": 537.0,
|
|
"completions/mean_length": 118.529296875,
|
|
"completions/mean_terminated_length": 118.529296875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.3120933175086975,
|
|
"learning_rate": 8.125e-07,
|
|
"loss": -0.001,
|
|
"num_tokens": 19191684.0,
|
|
"reward": 0.8515625,
|
|
"reward_std": 0.11107683926820755,
|
|
"rewards/accuracy_reward_conf_tag": 0.353515625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 27
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 402.0,
|
|
"completions/max_terminated_length": 402.0,
|
|
"completions/mean_length": 119.529296875,
|
|
"completions/mean_terminated_length": 119.529296875,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.4711366593837738,
|
|
"learning_rate": 8.4375e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 19916331.0,
|
|
"reward": 0.9736328125,
|
|
"reward_std": 0.1592046320438385,
|
|
"rewards/accuracy_reward_conf_tag": 0.478515625,
|
|
"rewards/format_reward_conf_tag": 0.990234375,
|
|
"step": 28
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 589.0,
|
|
"completions/max_terminated_length": 589.0,
|
|
"completions/mean_length": 119.876953125,
|
|
"completions/mean_terminated_length": 119.876953125,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.2972983121871948,
|
|
"learning_rate": 8.75e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 20612092.0,
|
|
"reward": 0.9306640625,
|
|
"reward_std": 0.10890618711709976,
|
|
"rewards/accuracy_reward_conf_tag": 0.43359375,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 29
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 121.0,
|
|
"completions/mean_terminated_length": 121.0,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.4076586067676544,
|
|
"learning_rate": 9.0625e-07,
|
|
"loss": -0.0047,
|
|
"num_tokens": 21316700.0,
|
|
"reward": 0.869140625,
|
|
"reward_std": 0.16637209057807922,
|
|
"rewards/accuracy_reward_conf_tag": 0.37109375,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 437.0,
|
|
"completions/max_terminated_length": 437.0,
|
|
"completions/mean_length": 113.517578125,
|
|
"completions/mean_terminated_length": 113.517578125,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.5173690915107727,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0033,
|
|
"num_tokens": 22020325.0,
|
|
"reward": 1.0048828125,
|
|
"reward_std": 0.1903141289949417,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 0.990234375,
|
|
"step": 31
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 358.0,
|
|
"completions/max_terminated_length": 358.0,
|
|
"completions/mean_length": 121.724609375,
|
|
"completions/mean_terminated_length": 121.724609375,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.43424108624458313,
|
|
"learning_rate": 9.6875e-07,
|
|
"loss": -0.0056,
|
|
"num_tokens": 22742224.0,
|
|
"reward": 0.9140625,
|
|
"reward_std": 0.1641375869512558,
|
|
"rewards/accuracy_reward_conf_tag": 0.416015625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 32
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 349.0,
|
|
"completions/max_terminated_length": 349.0,
|
|
"completions/mean_length": 110.896484375,
|
|
"completions/mean_terminated_length": 111.1135025024414,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.4005340337753296,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0024,
|
|
"num_tokens": 23456827.0,
|
|
"reward": 0.908203125,
|
|
"reward_std": 0.14249923825263977,
|
|
"rewards/accuracy_reward_conf_tag": 0.41015625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 33
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 336.0,
|
|
"completions/max_terminated_length": 336.0,
|
|
"completions/mean_length": 113.3359375,
|
|
"completions/mean_terminated_length": 113.3359375,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.37346094846725464,
|
|
"learning_rate": 9.9644128113879e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 24170503.0,
|
|
"reward": 0.990234375,
|
|
"reward_std": 0.1363212764263153,
|
|
"rewards/accuracy_reward_conf_tag": 0.490234375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 34
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 396.0,
|
|
"completions/max_terminated_length": 396.0,
|
|
"completions/mean_length": 112.53515625,
|
|
"completions/mean_terminated_length": 112.53515625,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.40560030937194824,
|
|
"learning_rate": 9.9288256227758e-07,
|
|
"loss": 0.0034,
|
|
"num_tokens": 24854665.0,
|
|
"reward": 1.0517578125,
|
|
"reward_std": 0.15736976265907288,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 342.0,
|
|
"completions/max_terminated_length": 342.0,
|
|
"completions/mean_length": 109.083984375,
|
|
"completions/mean_terminated_length": 109.083984375,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.37966471910476685,
|
|
"learning_rate": 9.8932384341637e-07,
|
|
"loss": 0.0025,
|
|
"num_tokens": 25537916.0,
|
|
"reward": 0.98828125,
|
|
"reward_std": 0.11993881314992905,
|
|
"rewards/accuracy_reward_conf_tag": 0.48828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 36
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 384.0,
|
|
"completions/max_terminated_length": 384.0,
|
|
"completions/mean_length": 114.1640625,
|
|
"completions/mean_terminated_length": 114.1640625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.49092215299606323,
|
|
"learning_rate": 9.8576512455516e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 26229336.0,
|
|
"reward": 0.87890625,
|
|
"reward_std": 0.1713615357875824,
|
|
"rewards/accuracy_reward_conf_tag": 0.380859375,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 37
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 342.0,
|
|
"completions/max_terminated_length": 342.0,
|
|
"completions/mean_length": 111.259765625,
|
|
"completions/mean_terminated_length": 111.259765625,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.3609483242034912,
|
|
"learning_rate": 9.8220640569395e-07,
|
|
"loss": -0.004,
|
|
"num_tokens": 26935821.0,
|
|
"reward": 0.96484375,
|
|
"reward_std": 0.12743577361106873,
|
|
"rewards/accuracy_reward_conf_tag": 0.46484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 38
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 539.0,
|
|
"completions/max_terminated_length": 539.0,
|
|
"completions/mean_length": 108.09765625,
|
|
"completions/mean_terminated_length": 108.09765625,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.37006741762161255,
|
|
"learning_rate": 9.786476868327401e-07,
|
|
"loss": -0.0013,
|
|
"num_tokens": 27632351.0,
|
|
"reward": 0.9599609375,
|
|
"reward_std": 0.14255574345588684,
|
|
"rewards/accuracy_reward_conf_tag": 0.4609375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 39
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 326.0,
|
|
"completions/max_terminated_length": 326.0,
|
|
"completions/mean_length": 111.908203125,
|
|
"completions/mean_terminated_length": 111.908203125,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.33325719833374023,
|
|
"learning_rate": 9.750889679715302e-07,
|
|
"loss": -0.002,
|
|
"num_tokens": 28335424.0,
|
|
"reward": 0.892578125,
|
|
"reward_std": 0.08929628133773804,
|
|
"rewards/accuracy_reward_conf_tag": 0.392578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 416.0,
|
|
"completions/max_terminated_length": 416.0,
|
|
"completions/mean_length": 105.275390625,
|
|
"completions/mean_terminated_length": 105.275390625,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.38754379749298096,
|
|
"learning_rate": 9.715302491103202e-07,
|
|
"loss": -0.001,
|
|
"num_tokens": 29036165.0,
|
|
"reward": 0.9609375,
|
|
"reward_std": 0.12428481131792068,
|
|
"rewards/accuracy_reward_conf_tag": 0.4609375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 41
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 301.0,
|
|
"completions/max_terminated_length": 301.0,
|
|
"completions/mean_length": 103.38671875,
|
|
"completions/mean_terminated_length": 103.38671875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.5092437267303467,
|
|
"learning_rate": 9.679715302491102e-07,
|
|
"loss": 0.0012,
|
|
"num_tokens": 29740691.0,
|
|
"reward": 1.0556640625,
|
|
"reward_std": 0.12526994943618774,
|
|
"rewards/accuracy_reward_conf_tag": 0.556640625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 42
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 373.0,
|
|
"completions/max_terminated_length": 373.0,
|
|
"completions/mean_length": 103.85546875,
|
|
"completions/mean_terminated_length": 103.85546875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.3068692088127136,
|
|
"learning_rate": 9.644128113879002e-07,
|
|
"loss": 0.0016,
|
|
"num_tokens": 30422817.0,
|
|
"reward": 1.0537109375,
|
|
"reward_std": 0.08325093984603882,
|
|
"rewards/accuracy_reward_conf_tag": 0.5546875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 43
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 267.0,
|
|
"completions/max_terminated_length": 267.0,
|
|
"completions/mean_length": 104.0390625,
|
|
"completions/mean_terminated_length": 104.0390625,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.4194898009300232,
|
|
"learning_rate": 9.608540925266903e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 31110461.0,
|
|
"reward": 1.1123046875,
|
|
"reward_std": 0.1408482789993286,
|
|
"rewards/accuracy_reward_conf_tag": 0.61328125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 44
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 490.0,
|
|
"completions/max_terminated_length": 490.0,
|
|
"completions/mean_length": 110.890625,
|
|
"completions/mean_terminated_length": 110.890625,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.35951900482177734,
|
|
"learning_rate": 9.572953736654805e-07,
|
|
"loss": -0.0015,
|
|
"num_tokens": 31810749.0,
|
|
"reward": 0.966796875,
|
|
"reward_std": 0.12099841982126236,
|
|
"rewards/accuracy_reward_conf_tag": 0.466796875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 322.0,
|
|
"completions/max_terminated_length": 322.0,
|
|
"completions/mean_length": 107.833984375,
|
|
"completions/mean_terminated_length": 107.833984375,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.30960193276405334,
|
|
"learning_rate": 9.537366548042705e-07,
|
|
"loss": 0.0026,
|
|
"num_tokens": 32494312.0,
|
|
"reward": 1.046875,
|
|
"reward_std": 0.10974523425102234,
|
|
"rewards/accuracy_reward_conf_tag": 0.546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 46
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 494.0,
|
|
"completions/max_terminated_length": 494.0,
|
|
"completions/mean_length": 112.703125,
|
|
"completions/mean_terminated_length": 112.92367553710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.40593746304512024,
|
|
"learning_rate": 9.501779359430605e-07,
|
|
"loss": -0.0002,
|
|
"num_tokens": 33188664.0,
|
|
"reward": 1.0126953125,
|
|
"reward_std": 0.1391420215368271,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 47
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 348.0,
|
|
"completions/max_terminated_length": 348.0,
|
|
"completions/mean_length": 106.701171875,
|
|
"completions/mean_terminated_length": 106.701171875,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.34867721796035767,
|
|
"learning_rate": 9.466192170818504e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 33907967.0,
|
|
"reward": 1.02734375,
|
|
"reward_std": 0.12585808336734772,
|
|
"rewards/accuracy_reward_conf_tag": 0.52734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 48
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 814.0,
|
|
"completions/max_terminated_length": 814.0,
|
|
"completions/mean_length": 111.453125,
|
|
"completions/mean_terminated_length": 111.453125,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.39540815353393555,
|
|
"learning_rate": 9.430604982206405e-07,
|
|
"loss": 0.0047,
|
|
"num_tokens": 34588639.0,
|
|
"reward": 0.9755859375,
|
|
"reward_std": 0.12697330117225647,
|
|
"rewards/accuracy_reward_conf_tag": 0.4765625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 49
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 420.0,
|
|
"completions/max_terminated_length": 420.0,
|
|
"completions/mean_length": 108.625,
|
|
"completions/mean_terminated_length": 108.625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.4082591235637665,
|
|
"learning_rate": 9.395017793594306e-07,
|
|
"loss": 0.0064,
|
|
"num_tokens": 35313863.0,
|
|
"reward": 0.962890625,
|
|
"reward_std": 0.13952839374542236,
|
|
"rewards/accuracy_reward_conf_tag": 0.462890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 372.0,
|
|
"completions/max_terminated_length": 372.0,
|
|
"completions/mean_length": 101.4765625,
|
|
"completions/mean_terminated_length": 101.4765625,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.32683131098747253,
|
|
"learning_rate": 9.359430604982206e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 36018587.0,
|
|
"reward": 0.943359375,
|
|
"reward_std": 0.08364099264144897,
|
|
"rewards/accuracy_reward_conf_tag": 0.443359375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 51
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 760.0,
|
|
"completions/max_terminated_length": 760.0,
|
|
"completions/mean_length": 102.732421875,
|
|
"completions/mean_terminated_length": 102.732421875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.3419206738471985,
|
|
"learning_rate": 9.323843416370106e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 36726514.0,
|
|
"reward": 1.025390625,
|
|
"reward_std": 0.1361870914697647,
|
|
"rewards/accuracy_reward_conf_tag": 0.525390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 52
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 388.0,
|
|
"completions/max_terminated_length": 388.0,
|
|
"completions/mean_length": 104.595703125,
|
|
"completions/mean_terminated_length": 104.8003921508789,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.35515135526657104,
|
|
"learning_rate": 9.288256227758006e-07,
|
|
"loss": 0.001,
|
|
"num_tokens": 37450499.0,
|
|
"reward": 1.0068359375,
|
|
"reward_std": 0.11343596875667572,
|
|
"rewards/accuracy_reward_conf_tag": 0.5078125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 53
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 444.0,
|
|
"completions/max_terminated_length": 444.0,
|
|
"completions/mean_length": 112.248046875,
|
|
"completions/mean_terminated_length": 112.248046875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.3525088131427765,
|
|
"learning_rate": 9.252669039145908e-07,
|
|
"loss": 0.0058,
|
|
"num_tokens": 38165586.0,
|
|
"reward": 0.947265625,
|
|
"reward_std": 0.13717183470726013,
|
|
"rewards/accuracy_reward_conf_tag": 0.447265625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 54
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 385.0,
|
|
"completions/max_terminated_length": 385.0,
|
|
"completions/mean_length": 105.18359375,
|
|
"completions/mean_terminated_length": 105.18359375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.285494863986969,
|
|
"learning_rate": 9.217081850533808e-07,
|
|
"loss": -0.0009,
|
|
"num_tokens": 38860288.0,
|
|
"reward": 0.98046875,
|
|
"reward_std": 0.10272009670734406,
|
|
"rewards/accuracy_reward_conf_tag": 0.48046875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 401.0,
|
|
"completions/max_terminated_length": 401.0,
|
|
"completions/mean_length": 108.349609375,
|
|
"completions/mean_terminated_length": 108.349609375,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.3341920077800751,
|
|
"learning_rate": 9.181494661921708e-07,
|
|
"loss": 0.0054,
|
|
"num_tokens": 39569003.0,
|
|
"reward": 0.931640625,
|
|
"reward_std": 0.1034957766532898,
|
|
"rewards/accuracy_reward_conf_tag": 0.431640625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 56
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 319.0,
|
|
"completions/max_terminated_length": 319.0,
|
|
"completions/mean_length": 98.427734375,
|
|
"completions/mean_terminated_length": 98.427734375,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.41299957036972046,
|
|
"learning_rate": 9.145907473309609e-07,
|
|
"loss": 0.002,
|
|
"num_tokens": 40239166.0,
|
|
"reward": 1.072265625,
|
|
"reward_std": 0.10074299573898315,
|
|
"rewards/accuracy_reward_conf_tag": 0.572265625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 57
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 375.0,
|
|
"completions/max_terminated_length": 375.0,
|
|
"completions/mean_length": 101.177734375,
|
|
"completions/mean_terminated_length": 101.177734375,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.3476005494594574,
|
|
"learning_rate": 9.110320284697508e-07,
|
|
"loss": 0.0007,
|
|
"num_tokens": 40938969.0,
|
|
"reward": 1.107421875,
|
|
"reward_std": 0.10843471437692642,
|
|
"rewards/accuracy_reward_conf_tag": 0.607421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 58
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 375.0,
|
|
"completions/max_terminated_length": 375.0,
|
|
"completions/mean_length": 101.630859375,
|
|
"completions/mean_terminated_length": 101.630859375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.39399948716163635,
|
|
"learning_rate": 9.074733096085408e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 41616580.0,
|
|
"reward": 1.017578125,
|
|
"reward_std": 0.15452474355697632,
|
|
"rewards/accuracy_reward_conf_tag": 0.517578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 59
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 301.0,
|
|
"completions/max_terminated_length": 301.0,
|
|
"completions/mean_length": 100.8046875,
|
|
"completions/mean_terminated_length": 100.8046875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.43161484599113464,
|
|
"learning_rate": 9.03914590747331e-07,
|
|
"loss": -0.004,
|
|
"num_tokens": 42293008.0,
|
|
"reward": 0.951171875,
|
|
"reward_std": 0.12973544001579285,
|
|
"rewards/accuracy_reward_conf_tag": 0.451171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 317.0,
|
|
"completions/max_terminated_length": 317.0,
|
|
"completions/mean_length": 111.44140625,
|
|
"completions/mean_terminated_length": 111.44140625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.36474984884262085,
|
|
"learning_rate": 9.00355871886121e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 42975466.0,
|
|
"reward": 1.0517578125,
|
|
"reward_std": 0.13342617452144623,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 61
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 345.0,
|
|
"completions/max_terminated_length": 345.0,
|
|
"completions/mean_length": 99.87109375,
|
|
"completions/mean_terminated_length": 99.87109375,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.36876848340034485,
|
|
"learning_rate": 8.96797153024911e-07,
|
|
"loss": -0.0061,
|
|
"num_tokens": 43673400.0,
|
|
"reward": 1.0546875,
|
|
"reward_std": 0.12619948387145996,
|
|
"rewards/accuracy_reward_conf_tag": 0.556640625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 62
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 543.0,
|
|
"completions/max_terminated_length": 543.0,
|
|
"completions/mean_length": 110.521484375,
|
|
"completions/mean_terminated_length": 110.521484375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.38113224506378174,
|
|
"learning_rate": 8.93238434163701e-07,
|
|
"loss": 0.0041,
|
|
"num_tokens": 44388315.0,
|
|
"reward": 1.1005859375,
|
|
"reward_std": 0.11744188517332077,
|
|
"rewards/accuracy_reward_conf_tag": 0.6015625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 63
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 292.0,
|
|
"completions/max_terminated_length": 292.0,
|
|
"completions/mean_length": 103.083984375,
|
|
"completions/mean_terminated_length": 103.083984375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.3483692705631256,
|
|
"learning_rate": 8.896797153024911e-07,
|
|
"loss": 0.0019,
|
|
"num_tokens": 45101342.0,
|
|
"reward": 1.0,
|
|
"reward_std": 0.11651946604251862,
|
|
"rewards/accuracy_reward_conf_tag": 0.5,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 64
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 687.0,
|
|
"completions/max_terminated_length": 687.0,
|
|
"completions/mean_length": 107.66796875,
|
|
"completions/mean_terminated_length": 107.66796875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.26865509152412415,
|
|
"learning_rate": 8.861209964412811e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 45791420.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.07397978007793427,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 502.0,
|
|
"completions/max_terminated_length": 502.0,
|
|
"completions/mean_length": 109.1171875,
|
|
"completions/mean_terminated_length": 109.1171875,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.29685088992118835,
|
|
"learning_rate": 8.825622775800712e-07,
|
|
"loss": -0.0006,
|
|
"num_tokens": 46502144.0,
|
|
"reward": 0.96875,
|
|
"reward_std": 0.08534969389438629,
|
|
"rewards/accuracy_reward_conf_tag": 0.46875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 66
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 387.0,
|
|
"completions/max_terminated_length": 387.0,
|
|
"completions/mean_length": 103.14453125,
|
|
"completions/mean_terminated_length": 103.14453125,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.2144,
|
|
"grad_norm": 0.39966320991516113,
|
|
"learning_rate": 8.790035587188612e-07,
|
|
"loss": 0.0024,
|
|
"num_tokens": 47189898.0,
|
|
"reward": 0.919921875,
|
|
"reward_std": 0.12855593860149384,
|
|
"rewards/accuracy_reward_conf_tag": 0.419921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 67
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 563.0,
|
|
"completions/max_terminated_length": 563.0,
|
|
"completions/mean_length": 118.955078125,
|
|
"completions/mean_terminated_length": 119.1878662109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.2176,
|
|
"grad_norm": 0.290499746799469,
|
|
"learning_rate": 8.754448398576512e-07,
|
|
"loss": -0.004,
|
|
"num_tokens": 47905347.0,
|
|
"reward": 1.0126953125,
|
|
"reward_std": 0.08002512156963348,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 68
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 290.0,
|
|
"completions/max_terminated_length": 290.0,
|
|
"completions/mean_length": 104.892578125,
|
|
"completions/mean_terminated_length": 104.892578125,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.2208,
|
|
"grad_norm": 0.3273511230945587,
|
|
"learning_rate": 8.718861209964412e-07,
|
|
"loss": 0.0007,
|
|
"num_tokens": 48617268.0,
|
|
"reward": 0.98046875,
|
|
"reward_std": 0.11211731284856796,
|
|
"rewards/accuracy_reward_conf_tag": 0.48046875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 69
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 527.0,
|
|
"completions/max_terminated_length": 527.0,
|
|
"completions/mean_length": 113.2890625,
|
|
"completions/mean_terminated_length": 113.2890625,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.4415186643600464,
|
|
"learning_rate": 8.683274021352312e-07,
|
|
"loss": -0.0019,
|
|
"num_tokens": 49328640.0,
|
|
"reward": 0.9208984375,
|
|
"reward_std": 0.09035773575305939,
|
|
"rewards/accuracy_reward_conf_tag": 0.421875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 340.0,
|
|
"completions/max_terminated_length": 340.0,
|
|
"completions/mean_length": 115.138671875,
|
|
"completions/mean_terminated_length": 115.138671875,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.2272,
|
|
"grad_norm": 0.29847803711891174,
|
|
"learning_rate": 8.647686832740213e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 50057927.0,
|
|
"reward": 1.080078125,
|
|
"reward_std": 0.12132295966148376,
|
|
"rewards/accuracy_reward_conf_tag": 0.580078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 71
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 420.0,
|
|
"completions/max_terminated_length": 420.0,
|
|
"completions/mean_length": 114.884765625,
|
|
"completions/mean_terminated_length": 114.884765625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.2304,
|
|
"grad_norm": 0.2995069622993469,
|
|
"learning_rate": 8.612099644128114e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 50776676.0,
|
|
"reward": 1.072265625,
|
|
"reward_std": 0.11225028336048126,
|
|
"rewards/accuracy_reward_conf_tag": 0.572265625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 72
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 293.0,
|
|
"completions/max_terminated_length": 293.0,
|
|
"completions/mean_length": 107.51953125,
|
|
"completions/mean_terminated_length": 107.51953125,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.2336,
|
|
"grad_norm": 0.3123959004878998,
|
|
"learning_rate": 8.576512455516014e-07,
|
|
"loss": -0.0031,
|
|
"num_tokens": 51501750.0,
|
|
"reward": 1.1025390625,
|
|
"reward_std": 0.10646820068359375,
|
|
"rewards/accuracy_reward_conf_tag": 0.603515625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 73
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 391.0,
|
|
"completions/max_terminated_length": 391.0,
|
|
"completions/mean_length": 109.5078125,
|
|
"completions/mean_terminated_length": 109.5078125,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.2368,
|
|
"grad_norm": 0.36578795313835144,
|
|
"learning_rate": 8.540925266903915e-07,
|
|
"loss": -0.0032,
|
|
"num_tokens": 52199714.0,
|
|
"reward": 1.1796875,
|
|
"reward_std": 0.11783071607351303,
|
|
"rewards/accuracy_reward_conf_tag": 0.6796875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 74
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 309.0,
|
|
"completions/max_terminated_length": 309.0,
|
|
"completions/mean_length": 112.310546875,
|
|
"completions/mean_terminated_length": 112.310546875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.3437058925628662,
|
|
"learning_rate": 8.505338078291815e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 52895609.0,
|
|
"reward": 0.953125,
|
|
"reward_std": 0.1145455539226532,
|
|
"rewards/accuracy_reward_conf_tag": 0.453125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 459.0,
|
|
"completions/max_terminated_length": 459.0,
|
|
"completions/mean_length": 112.302734375,
|
|
"completions/mean_terminated_length": 112.302734375,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.2432,
|
|
"grad_norm": 0.3406142592430115,
|
|
"learning_rate": 8.469750889679715e-07,
|
|
"loss": -0.0012,
|
|
"num_tokens": 53584540.0,
|
|
"reward": 0.96484375,
|
|
"reward_std": 0.11579868942499161,
|
|
"rewards/accuracy_reward_conf_tag": 0.46484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 76
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 362.0,
|
|
"completions/max_terminated_length": 362.0,
|
|
"completions/mean_length": 110.951171875,
|
|
"completions/mean_terminated_length": 110.951171875,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.2464,
|
|
"grad_norm": 0.35264548659324646,
|
|
"learning_rate": 8.434163701067614e-07,
|
|
"loss": 0.002,
|
|
"num_tokens": 54289955.0,
|
|
"reward": 1.07421875,
|
|
"reward_std": 0.1356578916311264,
|
|
"rewards/accuracy_reward_conf_tag": 0.57421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 77
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 427.0,
|
|
"completions/max_terminated_length": 427.0,
|
|
"completions/mean_length": 113.669921875,
|
|
"completions/mean_terminated_length": 113.669921875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.2496,
|
|
"grad_norm": 0.3438767194747925,
|
|
"learning_rate": 8.398576512455516e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 55001058.0,
|
|
"reward": 1.052734375,
|
|
"reward_std": 0.12118876725435257,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 78
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 325.0,
|
|
"completions/max_terminated_length": 325.0,
|
|
"completions/mean_length": 116.921875,
|
|
"completions/mean_terminated_length": 116.921875,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.2528,
|
|
"grad_norm": 0.33218762278556824,
|
|
"learning_rate": 8.362989323843416e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 55718834.0,
|
|
"reward": 1.02734375,
|
|
"reward_std": 0.08588011562824249,
|
|
"rewards/accuracy_reward_conf_tag": 0.52734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 79
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 498.0,
|
|
"completions/max_terminated_length": 498.0,
|
|
"completions/mean_length": 114.783203125,
|
|
"completions/mean_terminated_length": 114.783203125,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.24106380343437195,
|
|
"learning_rate": 8.327402135231316e-07,
|
|
"loss": -0.003,
|
|
"num_tokens": 56418067.0,
|
|
"reward": 0.990234375,
|
|
"reward_std": 0.09127214550971985,
|
|
"rewards/accuracy_reward_conf_tag": 0.490234375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 508.0,
|
|
"completions/max_terminated_length": 508.0,
|
|
"completions/mean_length": 131.59765625,
|
|
"completions/mean_terminated_length": 131.59765625,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.2592,
|
|
"grad_norm": 0.34052160382270813,
|
|
"learning_rate": 8.291814946619217e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 57118517.0,
|
|
"reward": 0.9453125,
|
|
"reward_std": 0.12986400723457336,
|
|
"rewards/accuracy_reward_conf_tag": 0.4453125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 81
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 336.0,
|
|
"completions/max_terminated_length": 336.0,
|
|
"completions/mean_length": 114.978515625,
|
|
"completions/mean_terminated_length": 114.978515625,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.2624,
|
|
"grad_norm": 0.32460305094718933,
|
|
"learning_rate": 8.256227758007117e-07,
|
|
"loss": -0.0002,
|
|
"num_tokens": 57827042.0,
|
|
"reward": 1.15234375,
|
|
"reward_std": 0.12519346177577972,
|
|
"rewards/accuracy_reward_conf_tag": 0.65234375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 82
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 334.0,
|
|
"completions/max_terminated_length": 334.0,
|
|
"completions/mean_length": 116.74609375,
|
|
"completions/mean_terminated_length": 116.74609375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.2656,
|
|
"grad_norm": 0.3671474754810333,
|
|
"learning_rate": 8.220640569395017e-07,
|
|
"loss": -0.0018,
|
|
"num_tokens": 58522448.0,
|
|
"reward": 1.07421875,
|
|
"reward_std": 0.10915425419807434,
|
|
"rewards/accuracy_reward_conf_tag": 0.57421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 83
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 124.150390625,
|
|
"completions/mean_terminated_length": 124.150390625,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.2688,
|
|
"grad_norm": 0.261443555355072,
|
|
"learning_rate": 8.185053380782919e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 59245277.0,
|
|
"reward": 0.923828125,
|
|
"reward_std": 0.09087396413087845,
|
|
"rewards/accuracy_reward_conf_tag": 0.423828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 84
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 424.0,
|
|
"completions/max_terminated_length": 424.0,
|
|
"completions/mean_length": 129.166015625,
|
|
"completions/mean_terminated_length": 129.166015625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.23352903127670288,
|
|
"learning_rate": 8.149466192170819e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 59942858.0,
|
|
"reward": 0.951171875,
|
|
"reward_std": 0.08916649222373962,
|
|
"rewards/accuracy_reward_conf_tag": 0.451171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 396.0,
|
|
"completions/max_terminated_length": 396.0,
|
|
"completions/mean_length": 124.97265625,
|
|
"completions/mean_terminated_length": 124.97265625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.2752,
|
|
"grad_norm": 0.4312765300273895,
|
|
"learning_rate": 8.113879003558719e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 60664276.0,
|
|
"reward": 1.00390625,
|
|
"reward_std": 0.12960247695446014,
|
|
"rewards/accuracy_reward_conf_tag": 0.50390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 86
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 480.0,
|
|
"completions/max_terminated_length": 480.0,
|
|
"completions/mean_length": 124.25390625,
|
|
"completions/mean_terminated_length": 124.25390625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.2784,
|
|
"grad_norm": 0.23223963379859924,
|
|
"learning_rate": 8.078291814946618e-07,
|
|
"loss": -0.0012,
|
|
"num_tokens": 61382414.0,
|
|
"reward": 0.923828125,
|
|
"reward_std": 0.07910466194152832,
|
|
"rewards/accuracy_reward_conf_tag": 0.423828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 87
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 120.853515625,
|
|
"completions/mean_terminated_length": 120.853515625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.2816,
|
|
"grad_norm": 0.38976651430130005,
|
|
"learning_rate": 8.042704626334519e-07,
|
|
"loss": -0.003,
|
|
"num_tokens": 62066867.0,
|
|
"reward": 1.0517578125,
|
|
"reward_std": 0.14316235482692719,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 88
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 435.0,
|
|
"completions/max_terminated_length": 435.0,
|
|
"completions/mean_length": 123.83203125,
|
|
"completions/mean_terminated_length": 123.83203125,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.2848,
|
|
"grad_norm": 0.30516958236694336,
|
|
"learning_rate": 8.007117437722419e-07,
|
|
"loss": 0.0042,
|
|
"num_tokens": 62771789.0,
|
|
"reward": 0.9892578125,
|
|
"reward_std": 0.1184254065155983,
|
|
"rewards/accuracy_reward_conf_tag": 0.490234375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 89
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 613.0,
|
|
"completions/max_terminated_length": 613.0,
|
|
"completions/mean_length": 126.091796875,
|
|
"completions/mean_terminated_length": 126.091796875,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.35033246874809265,
|
|
"learning_rate": 7.97153024911032e-07,
|
|
"loss": 0.0012,
|
|
"num_tokens": 63467484.0,
|
|
"reward": 1.052734375,
|
|
"reward_std": 0.13197088241577148,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 377.0,
|
|
"completions/max_terminated_length": 377.0,
|
|
"completions/mean_length": 123.984375,
|
|
"completions/mean_terminated_length": 123.984375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.2912,
|
|
"grad_norm": 0.27110937237739563,
|
|
"learning_rate": 7.935943060498221e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 64168980.0,
|
|
"reward": 1.0458984375,
|
|
"reward_std": 0.1114620566368103,
|
|
"rewards/accuracy_reward_conf_tag": 0.546875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 91
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 711.0,
|
|
"completions/max_terminated_length": 711.0,
|
|
"completions/mean_length": 128.50390625,
|
|
"completions/mean_terminated_length": 128.50390625,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.2944,
|
|
"grad_norm": 0.2403380125761032,
|
|
"learning_rate": 7.900355871886121e-07,
|
|
"loss": 0.0039,
|
|
"num_tokens": 64868038.0,
|
|
"reward": 1.015625,
|
|
"reward_std": 0.0864686369895935,
|
|
"rewards/accuracy_reward_conf_tag": 0.515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 92
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 317.0,
|
|
"completions/max_terminated_length": 317.0,
|
|
"completions/mean_length": 123.880859375,
|
|
"completions/mean_terminated_length": 123.880859375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.2976,
|
|
"grad_norm": 0.29576191306114197,
|
|
"learning_rate": 7.864768683274021e-07,
|
|
"loss": 0.0007,
|
|
"num_tokens": 65568777.0,
|
|
"reward": 1.048828125,
|
|
"reward_std": 0.10218648612499237,
|
|
"rewards/accuracy_reward_conf_tag": 0.548828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 93
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 533.0,
|
|
"completions/max_terminated_length": 533.0,
|
|
"completions/mean_length": 122.111328125,
|
|
"completions/mean_terminated_length": 122.111328125,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.3008,
|
|
"grad_norm": 0.39196375012397766,
|
|
"learning_rate": 7.829181494661921e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 66287642.0,
|
|
"reward": 1.03125,
|
|
"reward_std": 0.12815654277801514,
|
|
"rewards/accuracy_reward_conf_tag": 0.53125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 94
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 506.0,
|
|
"completions/max_terminated_length": 506.0,
|
|
"completions/mean_length": 119.0078125,
|
|
"completions/mean_terminated_length": 119.0078125,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.30568569898605347,
|
|
"learning_rate": 7.793594306049822e-07,
|
|
"loss": -0.0015,
|
|
"num_tokens": 66983814.0,
|
|
"reward": 0.99609375,
|
|
"reward_std": 0.09231622517108917,
|
|
"rewards/accuracy_reward_conf_tag": 0.49609375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 420.0,
|
|
"completions/max_terminated_length": 420.0,
|
|
"completions/mean_length": 122.529296875,
|
|
"completions/mean_terminated_length": 122.529296875,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.3072,
|
|
"grad_norm": 0.25298038125038147,
|
|
"learning_rate": 7.758007117437722e-07,
|
|
"loss": 0.0025,
|
|
"num_tokens": 67703637.0,
|
|
"reward": 0.876953125,
|
|
"reward_std": 0.07963507622480392,
|
|
"rewards/accuracy_reward_conf_tag": 0.376953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 96
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 321.0,
|
|
"completions/max_terminated_length": 321.0,
|
|
"completions/mean_length": 125.6484375,
|
|
"completions/mean_terminated_length": 125.6484375,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.3104,
|
|
"grad_norm": 0.3075512647628784,
|
|
"learning_rate": 7.722419928825622e-07,
|
|
"loss": 0.0018,
|
|
"num_tokens": 68426505.0,
|
|
"reward": 1.044921875,
|
|
"reward_std": 0.12158694118261337,
|
|
"rewards/accuracy_reward_conf_tag": 0.544921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 97
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 327.0,
|
|
"completions/max_terminated_length": 327.0,
|
|
"completions/mean_length": 119.001953125,
|
|
"completions/mean_terminated_length": 119.001953125,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.3136,
|
|
"grad_norm": 0.27927401661872864,
|
|
"learning_rate": 7.686832740213523e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 69136106.0,
|
|
"reward": 1.13671875,
|
|
"reward_std": 0.1054728776216507,
|
|
"rewards/accuracy_reward_conf_tag": 0.63671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 98
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 643.0,
|
|
"completions/max_terminated_length": 643.0,
|
|
"completions/mean_length": 124.619140625,
|
|
"completions/mean_terminated_length": 124.619140625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.3168,
|
|
"grad_norm": 0.32262176275253296,
|
|
"learning_rate": 7.651245551601423e-07,
|
|
"loss": 0.0011,
|
|
"num_tokens": 69851343.0,
|
|
"reward": 0.9853515625,
|
|
"reward_std": 0.13401469588279724,
|
|
"rewards/accuracy_reward_conf_tag": 0.486328125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 99
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 415.0,
|
|
"completions/max_terminated_length": 415.0,
|
|
"completions/mean_length": 121.791015625,
|
|
"completions/mean_terminated_length": 121.791015625,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.23345372080802917,
|
|
"learning_rate": 7.615658362989323e-07,
|
|
"loss": 0.0062,
|
|
"num_tokens": 70537860.0,
|
|
"reward": 1.0,
|
|
"reward_std": 0.08404040336608887,
|
|
"rewards/accuracy_reward_conf_tag": 0.5,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 470.0,
|
|
"completions/max_terminated_length": 470.0,
|
|
"completions/mean_length": 123.078125,
|
|
"completions/mean_terminated_length": 123.078125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.3232,
|
|
"grad_norm": 0.3626199960708618,
|
|
"learning_rate": 7.580071174377223e-07,
|
|
"loss": 0.0054,
|
|
"num_tokens": 71219380.0,
|
|
"reward": 0.8740234375,
|
|
"reward_std": 0.13244062662124634,
|
|
"rewards/accuracy_reward_conf_tag": 0.375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 101
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 440.0,
|
|
"completions/max_terminated_length": 440.0,
|
|
"completions/mean_length": 125.478515625,
|
|
"completions/mean_terminated_length": 125.478515625,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.3264,
|
|
"grad_norm": 0.32900869846343994,
|
|
"learning_rate": 7.544483985765125e-07,
|
|
"loss": -0.0028,
|
|
"num_tokens": 71921705.0,
|
|
"reward": 1.0595703125,
|
|
"reward_std": 0.14519304037094116,
|
|
"rewards/accuracy_reward_conf_tag": 0.560546875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 102
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 739.0,
|
|
"completions/max_terminated_length": 739.0,
|
|
"completions/mean_length": 129.78515625,
|
|
"completions/mean_terminated_length": 129.78515625,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.3296,
|
|
"grad_norm": 0.27870070934295654,
|
|
"learning_rate": 7.508896797153025e-07,
|
|
"loss": -0.0008,
|
|
"num_tokens": 72607955.0,
|
|
"reward": 1.0859375,
|
|
"reward_std": 0.10784495621919632,
|
|
"rewards/accuracy_reward_conf_tag": 0.5859375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 103
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 342.0,
|
|
"completions/max_terminated_length": 342.0,
|
|
"completions/mean_length": 119.208984375,
|
|
"completions/mean_terminated_length": 119.44226837158203,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.3328,
|
|
"grad_norm": 0.3086647093296051,
|
|
"learning_rate": 7.473309608540925e-07,
|
|
"loss": -0.0011,
|
|
"num_tokens": 73312158.0,
|
|
"reward": 1.0751953125,
|
|
"reward_std": 0.11778206378221512,
|
|
"rewards/accuracy_reward_conf_tag": 0.576171875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 104
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 383.0,
|
|
"completions/max_terminated_length": 383.0,
|
|
"completions/mean_length": 126.595703125,
|
|
"completions/mean_terminated_length": 126.595703125,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.24830111861228943,
|
|
"learning_rate": 7.437722419928826e-07,
|
|
"loss": -0.0003,
|
|
"num_tokens": 74005687.0,
|
|
"reward": 1.0390625,
|
|
"reward_std": 0.11961427330970764,
|
|
"rewards/accuracy_reward_conf_tag": 0.5390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 458.0,
|
|
"completions/max_terminated_length": 458.0,
|
|
"completions/mean_length": 128.09375,
|
|
"completions/mean_terminated_length": 128.09375,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.3392,
|
|
"grad_norm": 0.36403143405914307,
|
|
"learning_rate": 7.402135231316725e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 74718207.0,
|
|
"reward": 0.970703125,
|
|
"reward_std": 0.10586786270141602,
|
|
"rewards/accuracy_reward_conf_tag": 0.470703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 106
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 462.0,
|
|
"completions/max_terminated_length": 462.0,
|
|
"completions/mean_length": 127.37890625,
|
|
"completions/mean_terminated_length": 127.37890625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.3424,
|
|
"grad_norm": 0.3605183959007263,
|
|
"learning_rate": 7.366548042704625e-07,
|
|
"loss": 0.0076,
|
|
"num_tokens": 75440481.0,
|
|
"reward": 0.982421875,
|
|
"reward_std": 0.14683744311332703,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 107
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 483.0,
|
|
"completions/max_terminated_length": 483.0,
|
|
"completions/mean_length": 119.15625,
|
|
"completions/mean_terminated_length": 119.15625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.3456,
|
|
"grad_norm": 0.36166146397590637,
|
|
"learning_rate": 7.330960854092527e-07,
|
|
"loss": -0.0064,
|
|
"num_tokens": 76160809.0,
|
|
"reward": 0.9990234375,
|
|
"reward_std": 0.1018570065498352,
|
|
"rewards/accuracy_reward_conf_tag": 0.5,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 108
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 417.0,
|
|
"completions/max_terminated_length": 417.0,
|
|
"completions/mean_length": 128.896484375,
|
|
"completions/mean_terminated_length": 128.896484375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.3488,
|
|
"grad_norm": 0.1903688907623291,
|
|
"learning_rate": 7.295373665480427e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 76886116.0,
|
|
"reward": 0.96484375,
|
|
"reward_std": 0.05852591618895531,
|
|
"rewards/accuracy_reward_conf_tag": 0.46484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 109
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 525.0,
|
|
"completions/max_terminated_length": 525.0,
|
|
"completions/mean_length": 127.50390625,
|
|
"completions/mean_terminated_length": 127.50390625,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.2527593970298767,
|
|
"learning_rate": 7.259786476868327e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 77611566.0,
|
|
"reward": 0.998046875,
|
|
"reward_std": 0.0958084836602211,
|
|
"rewards/accuracy_reward_conf_tag": 0.498046875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 396.0,
|
|
"completions/max_terminated_length": 396.0,
|
|
"completions/mean_length": 122.361328125,
|
|
"completions/mean_terminated_length": 122.361328125,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.3552,
|
|
"grad_norm": 0.3307143449783325,
|
|
"learning_rate": 7.224199288256227e-07,
|
|
"loss": 0.0033,
|
|
"num_tokens": 78295487.0,
|
|
"reward": 1.076171875,
|
|
"reward_std": 0.09804637730121613,
|
|
"rewards/accuracy_reward_conf_tag": 0.576171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 111
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 386.0,
|
|
"completions/max_terminated_length": 386.0,
|
|
"completions/mean_length": 120.083984375,
|
|
"completions/mean_terminated_length": 120.083984375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.3584,
|
|
"grad_norm": 0.27053558826446533,
|
|
"learning_rate": 7.188612099644128e-07,
|
|
"loss": 0.0036,
|
|
"num_tokens": 79002450.0,
|
|
"reward": 1.0,
|
|
"reward_std": 0.08529354631900787,
|
|
"rewards/accuracy_reward_conf_tag": 0.5,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 112
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 288.0,
|
|
"completions/max_terminated_length": 288.0,
|
|
"completions/mean_length": 113.77734375,
|
|
"completions/mean_terminated_length": 113.77734375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.3616,
|
|
"grad_norm": 0.3072631061077118,
|
|
"learning_rate": 7.153024911032028e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 79721328.0,
|
|
"reward": 1.068359375,
|
|
"reward_std": 0.10514955222606659,
|
|
"rewards/accuracy_reward_conf_tag": 0.568359375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 113
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 571.0,
|
|
"completions/max_terminated_length": 571.0,
|
|
"completions/mean_length": 131.81640625,
|
|
"completions/mean_terminated_length": 131.81640625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.3648,
|
|
"grad_norm": 0.2604560852050781,
|
|
"learning_rate": 7.117437722419929e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 80445730.0,
|
|
"reward": 0.91796875,
|
|
"reward_std": 0.08462892472743988,
|
|
"rewards/accuracy_reward_conf_tag": 0.41796875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 114
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 556.0,
|
|
"completions/max_terminated_length": 556.0,
|
|
"completions/mean_length": 120.966796875,
|
|
"completions/mean_terminated_length": 120.966796875,
|
|
"completions/min_length": 55.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.2620708644390106,
|
|
"learning_rate": 7.08185053380783e-07,
|
|
"loss": -0.0027,
|
|
"num_tokens": 81157393.0,
|
|
"reward": 1.029296875,
|
|
"reward_std": 0.08475994318723679,
|
|
"rewards/accuracy_reward_conf_tag": 0.529296875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 351.0,
|
|
"completions/max_terminated_length": 351.0,
|
|
"completions/mean_length": 116.03125,
|
|
"completions/mean_terminated_length": 116.03125,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.3712,
|
|
"grad_norm": 0.3374411165714264,
|
|
"learning_rate": 7.046263345195729e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 81862697.0,
|
|
"reward": 1.0546875,
|
|
"reward_std": 0.11638721823692322,
|
|
"rewards/accuracy_reward_conf_tag": 0.5546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 116
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 284.0,
|
|
"completions/max_terminated_length": 284.0,
|
|
"completions/mean_length": 115.44140625,
|
|
"completions/mean_terminated_length": 115.44140625,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.3744,
|
|
"grad_norm": 0.2924691438674927,
|
|
"learning_rate": 7.010676156583629e-07,
|
|
"loss": -0.0026,
|
|
"num_tokens": 82538027.0,
|
|
"reward": 0.978515625,
|
|
"reward_std": 0.0666126236319542,
|
|
"rewards/accuracy_reward_conf_tag": 0.478515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 117
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 719.0,
|
|
"completions/max_terminated_length": 719.0,
|
|
"completions/mean_length": 118.435546875,
|
|
"completions/mean_terminated_length": 118.435546875,
|
|
"completions/min_length": 58.0,
|
|
"completions/min_terminated_length": 58.0,
|
|
"epoch": 0.3776,
|
|
"grad_norm": 0.3892248868942261,
|
|
"learning_rate": 6.975088967971529e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 83243658.0,
|
|
"reward": 1.001953125,
|
|
"reward_std": 0.09284786880016327,
|
|
"rewards/accuracy_reward_conf_tag": 0.501953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 118
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 126.115234375,
|
|
"completions/mean_terminated_length": 126.115234375,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.3808,
|
|
"grad_norm": 0.34575212001800537,
|
|
"learning_rate": 6.93950177935943e-07,
|
|
"loss": -0.0026,
|
|
"num_tokens": 83942717.0,
|
|
"reward": 0.974609375,
|
|
"reward_std": 0.12388540059328079,
|
|
"rewards/accuracy_reward_conf_tag": 0.474609375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 119
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 371.0,
|
|
"completions/max_terminated_length": 371.0,
|
|
"completions/mean_length": 120.318359375,
|
|
"completions/mean_terminated_length": 120.318359375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.3228378891944885,
|
|
"learning_rate": 6.903914590747331e-07,
|
|
"loss": -0.0019,
|
|
"num_tokens": 84644472.0,
|
|
"reward": 1.0615234375,
|
|
"reward_std": 0.11856082826852798,
|
|
"rewards/accuracy_reward_conf_tag": 0.5625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 381.0,
|
|
"completions/max_terminated_length": 381.0,
|
|
"completions/mean_length": 115.396484375,
|
|
"completions/mean_terminated_length": 115.396484375,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.3872,
|
|
"grad_norm": 0.3080546259880066,
|
|
"learning_rate": 6.868327402135231e-07,
|
|
"loss": 0.0042,
|
|
"num_tokens": 85346659.0,
|
|
"reward": 1.0322265625,
|
|
"reward_std": 0.09166219830513,
|
|
"rewards/accuracy_reward_conf_tag": 0.533203125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 121
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 557.0,
|
|
"completions/max_terminated_length": 557.0,
|
|
"completions/mean_length": 123.498046875,
|
|
"completions/mean_terminated_length": 123.498046875,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.3904,
|
|
"grad_norm": 0.2807541489601135,
|
|
"learning_rate": 6.832740213523132e-07,
|
|
"loss": 0.0019,
|
|
"num_tokens": 86033194.0,
|
|
"reward": 1.119140625,
|
|
"reward_std": 0.11750739067792892,
|
|
"rewards/accuracy_reward_conf_tag": 0.619140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 122
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 381.0,
|
|
"completions/max_terminated_length": 381.0,
|
|
"completions/mean_length": 119.154296875,
|
|
"completions/mean_terminated_length": 119.154296875,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.3936,
|
|
"grad_norm": 0.2932218313217163,
|
|
"learning_rate": 6.797153024911032e-07,
|
|
"loss": -0.0024,
|
|
"num_tokens": 86743521.0,
|
|
"reward": 0.982421875,
|
|
"reward_std": 0.12789133191108704,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 123
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 111.0,
|
|
"completions/mean_terminated_length": 111.0,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.3968,
|
|
"grad_norm": 0.2224765568971634,
|
|
"learning_rate": 6.761565836298932e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 87461897.0,
|
|
"reward": 0.962890625,
|
|
"reward_std": 0.09001900255680084,
|
|
"rewards/accuracy_reward_conf_tag": 0.462890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 124
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 352.0,
|
|
"completions/max_terminated_length": 352.0,
|
|
"completions/mean_length": 118.009765625,
|
|
"completions/mean_terminated_length": 118.009765625,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.3143448233604431,
|
|
"learning_rate": 6.725978647686833e-07,
|
|
"loss": 0.0015,
|
|
"num_tokens": 88171806.0,
|
|
"reward": 1.1005859375,
|
|
"reward_std": 0.0854901671409607,
|
|
"rewards/accuracy_reward_conf_tag": 0.6015625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 370.0,
|
|
"completions/max_terminated_length": 370.0,
|
|
"completions/mean_length": 127.052734375,
|
|
"completions/mean_terminated_length": 127.052734375,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.4032,
|
|
"grad_norm": 0.2967285215854645,
|
|
"learning_rate": 6.690391459074733e-07,
|
|
"loss": -0.0034,
|
|
"num_tokens": 88882945.0,
|
|
"reward": 0.95703125,
|
|
"reward_std": 0.12855716049671173,
|
|
"rewards/accuracy_reward_conf_tag": 0.45703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 126
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 361.0,
|
|
"completions/max_terminated_length": 361.0,
|
|
"completions/mean_length": 112.880859375,
|
|
"completions/mean_terminated_length": 112.880859375,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.4064,
|
|
"grad_norm": 0.29222628474235535,
|
|
"learning_rate": 6.654804270462633e-07,
|
|
"loss": 0.0005,
|
|
"num_tokens": 89552300.0,
|
|
"reward": 1.01953125,
|
|
"reward_std": 0.10645762085914612,
|
|
"rewards/accuracy_reward_conf_tag": 0.51953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 127
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 740.0,
|
|
"completions/max_terminated_length": 740.0,
|
|
"completions/mean_length": 110.9296875,
|
|
"completions/mean_terminated_length": 110.9296875,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.4096,
|
|
"grad_norm": 0.3010176420211792,
|
|
"learning_rate": 6.619217081850533e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 90253248.0,
|
|
"reward": 1.0439453125,
|
|
"reward_std": 0.08535781502723694,
|
|
"rewards/accuracy_reward_conf_tag": 0.544921875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 128
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 396.0,
|
|
"completions/max_terminated_length": 396.0,
|
|
"completions/mean_length": 116.814453125,
|
|
"completions/mean_terminated_length": 116.814453125,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.4128,
|
|
"grad_norm": 0.3324480950832367,
|
|
"learning_rate": 6.583629893238434e-07,
|
|
"loss": 0.0007,
|
|
"num_tokens": 90963753.0,
|
|
"reward": 0.994140625,
|
|
"reward_std": 0.14064979553222656,
|
|
"rewards/accuracy_reward_conf_tag": 0.494140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 129
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 509.0,
|
|
"completions/max_terminated_length": 509.0,
|
|
"completions/mean_length": 120.736328125,
|
|
"completions/mean_terminated_length": 120.736328125,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.3121265172958374,
|
|
"learning_rate": 6.548042704626334e-07,
|
|
"loss": 0.0041,
|
|
"num_tokens": 91660994.0,
|
|
"reward": 1.0234375,
|
|
"reward_std": 0.11435520648956299,
|
|
"rewards/accuracy_reward_conf_tag": 0.5234375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 444.0,
|
|
"completions/max_terminated_length": 444.0,
|
|
"completions/mean_length": 115.466796875,
|
|
"completions/mean_terminated_length": 115.466796875,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.4192,
|
|
"grad_norm": 0.20846891403198242,
|
|
"learning_rate": 6.512455516014234e-07,
|
|
"loss": -0.0025,
|
|
"num_tokens": 92371857.0,
|
|
"reward": 1.083984375,
|
|
"reward_std": 0.07213812321424484,
|
|
"rewards/accuracy_reward_conf_tag": 0.583984375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 131
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 483.0,
|
|
"completions/max_terminated_length": 483.0,
|
|
"completions/mean_length": 122.80859375,
|
|
"completions/mean_terminated_length": 122.80859375,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.4224,
|
|
"grad_norm": 0.3403347432613373,
|
|
"learning_rate": 6.476868327402136e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 93079431.0,
|
|
"reward": 0.892578125,
|
|
"reward_std": 0.12572510540485382,
|
|
"rewards/accuracy_reward_conf_tag": 0.392578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 132
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 468.0,
|
|
"completions/max_terminated_length": 468.0,
|
|
"completions/mean_length": 118.029296875,
|
|
"completions/mean_terminated_length": 118.029296875,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.4256,
|
|
"grad_norm": 0.32223713397979736,
|
|
"learning_rate": 6.441281138790036e-07,
|
|
"loss": 0.0029,
|
|
"num_tokens": 93788366.0,
|
|
"reward": 1.150390625,
|
|
"reward_std": 0.1244158148765564,
|
|
"rewards/accuracy_reward_conf_tag": 0.650390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 133
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 333.0,
|
|
"completions/max_terminated_length": 333.0,
|
|
"completions/mean_length": 114.587890625,
|
|
"completions/mean_terminated_length": 114.587890625,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.4288,
|
|
"grad_norm": 0.2855245769023895,
|
|
"learning_rate": 6.405693950177936e-07,
|
|
"loss": -0.0027,
|
|
"num_tokens": 94470443.0,
|
|
"reward": 0.9609375,
|
|
"reward_std": 0.08022482693195343,
|
|
"rewards/accuracy_reward_conf_tag": 0.4609375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 134
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 404.0,
|
|
"completions/max_terminated_length": 404.0,
|
|
"completions/mean_length": 115.17578125,
|
|
"completions/mean_terminated_length": 115.17578125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.28281596302986145,
|
|
"learning_rate": 6.370106761565835e-07,
|
|
"loss": 0.0042,
|
|
"num_tokens": 95182293.0,
|
|
"reward": 1.037109375,
|
|
"reward_std": 0.11684277653694153,
|
|
"rewards/accuracy_reward_conf_tag": 0.537109375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 442.0,
|
|
"completions/max_terminated_length": 442.0,
|
|
"completions/mean_length": 119.787109375,
|
|
"completions/mean_terminated_length": 119.787109375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.4352,
|
|
"grad_norm": 0.2585252523422241,
|
|
"learning_rate": 6.334519572953736e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 95877848.0,
|
|
"reward": 0.9501953125,
|
|
"reward_std": 0.0925920158624649,
|
|
"rewards/accuracy_reward_conf_tag": 0.451171875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 136
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 667.0,
|
|
"completions/max_terminated_length": 667.0,
|
|
"completions/mean_length": 117.841796875,
|
|
"completions/mean_terminated_length": 117.841796875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.4384,
|
|
"grad_norm": 0.3071497976779938,
|
|
"learning_rate": 6.298932384341636e-07,
|
|
"loss": 0.0005,
|
|
"num_tokens": 96571727.0,
|
|
"reward": 1.009765625,
|
|
"reward_std": 0.09994859993457794,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 137
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 394.0,
|
|
"completions/max_terminated_length": 394.0,
|
|
"completions/mean_length": 120.087890625,
|
|
"completions/mean_terminated_length": 120.087890625,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.4416,
|
|
"grad_norm": 0.30834370851516724,
|
|
"learning_rate": 6.263345195729537e-07,
|
|
"loss": -0.0003,
|
|
"num_tokens": 97261036.0,
|
|
"reward": 1.076171875,
|
|
"reward_std": 0.12316463887691498,
|
|
"rewards/accuracy_reward_conf_tag": 0.576171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 138
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 566.0,
|
|
"completions/max_terminated_length": 566.0,
|
|
"completions/mean_length": 120.49609375,
|
|
"completions/mean_terminated_length": 120.49609375,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.4448,
|
|
"grad_norm": 0.3317796289920807,
|
|
"learning_rate": 6.227758007117438e-07,
|
|
"loss": 0.0068,
|
|
"num_tokens": 97985682.0,
|
|
"reward": 1.0029296875,
|
|
"reward_std": 0.09929457306861877,
|
|
"rewards/accuracy_reward_conf_tag": 0.50390625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 139
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 338.0,
|
|
"completions/max_terminated_length": 338.0,
|
|
"completions/mean_length": 115.91015625,
|
|
"completions/mean_terminated_length": 115.91015625,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.30667245388031006,
|
|
"learning_rate": 6.192170818505338e-07,
|
|
"loss": -0.0021,
|
|
"num_tokens": 98692340.0,
|
|
"reward": 1.064453125,
|
|
"reward_std": 0.11494496464729309,
|
|
"rewards/accuracy_reward_conf_tag": 0.564453125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 506.0,
|
|
"completions/max_terminated_length": 506.0,
|
|
"completions/mean_length": 120.390625,
|
|
"completions/mean_terminated_length": 120.390625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.4512,
|
|
"grad_norm": 0.2922412157058716,
|
|
"learning_rate": 6.156583629893238e-07,
|
|
"loss": 0.0067,
|
|
"num_tokens": 99419076.0,
|
|
"reward": 1.078125,
|
|
"reward_std": 0.115932896733284,
|
|
"rewards/accuracy_reward_conf_tag": 0.578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 141
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1022.0,
|
|
"completions/max_terminated_length": 1022.0,
|
|
"completions/mean_length": 121.181640625,
|
|
"completions/mean_terminated_length": 121.181640625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.4544,
|
|
"grad_norm": 0.2767346501350403,
|
|
"learning_rate": 6.120996441281139e-07,
|
|
"loss": 0.0063,
|
|
"num_tokens": 100136025.0,
|
|
"reward": 0.951171875,
|
|
"reward_std": 0.09436499327421188,
|
|
"rewards/accuracy_reward_conf_tag": 0.451171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 142
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 978.0,
|
|
"completions/max_terminated_length": 978.0,
|
|
"completions/mean_length": 135.03515625,
|
|
"completions/mean_terminated_length": 135.03515625,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.4576,
|
|
"grad_norm": 0.19604112207889557,
|
|
"learning_rate": 6.085409252669039e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 100862955.0,
|
|
"reward": 0.9580078125,
|
|
"reward_std": 0.06812161952257156,
|
|
"rewards/accuracy_reward_conf_tag": 0.458984375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 143
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 589.0,
|
|
"completions/max_terminated_length": 589.0,
|
|
"completions/mean_length": 132.37890625,
|
|
"completions/mean_terminated_length": 132.37890625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.4608,
|
|
"grad_norm": 0.2899148464202881,
|
|
"learning_rate": 6.04982206405694e-07,
|
|
"loss": 0.004,
|
|
"num_tokens": 101580701.0,
|
|
"reward": 0.837890625,
|
|
"reward_std": 0.10290726274251938,
|
|
"rewards/accuracy_reward_conf_tag": 0.337890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 144
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 790.0,
|
|
"completions/max_terminated_length": 790.0,
|
|
"completions/mean_length": 129.275390625,
|
|
"completions/mean_terminated_length": 129.275390625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.19214464724063873,
|
|
"learning_rate": 6.014234875444839e-07,
|
|
"loss": 0.0004,
|
|
"num_tokens": 102279466.0,
|
|
"reward": 0.9375,
|
|
"reward_std": 0.05596347898244858,
|
|
"rewards/accuracy_reward_conf_tag": 0.4375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 396.0,
|
|
"completions/max_terminated_length": 396.0,
|
|
"completions/mean_length": 133.865234375,
|
|
"completions/mean_terminated_length": 133.865234375,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.4672,
|
|
"grad_norm": 0.2788802683353424,
|
|
"learning_rate": 5.97864768683274e-07,
|
|
"loss": 0.0023,
|
|
"num_tokens": 102992461.0,
|
|
"reward": 1.013671875,
|
|
"reward_std": 0.08035779744386673,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 146
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 513.0,
|
|
"completions/max_terminated_length": 513.0,
|
|
"completions/mean_length": 134.048828125,
|
|
"completions/mean_terminated_length": 134.048828125,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.4704,
|
|
"grad_norm": 0.3049945533275604,
|
|
"learning_rate": 5.94306049822064e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 103722622.0,
|
|
"reward": 1.005859375,
|
|
"reward_std": 0.11737515777349472,
|
|
"rewards/accuracy_reward_conf_tag": 0.505859375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 147
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 427.0,
|
|
"completions/max_terminated_length": 427.0,
|
|
"completions/mean_length": 131.533203125,
|
|
"completions/mean_terminated_length": 131.533203125,
|
|
"completions/min_length": 64.0,
|
|
"completions/min_terminated_length": 64.0,
|
|
"epoch": 0.4736,
|
|
"grad_norm": 0.24999631941318512,
|
|
"learning_rate": 5.90747330960854e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 104435783.0,
|
|
"reward": 0.958984375,
|
|
"reward_std": 0.08745656907558441,
|
|
"rewards/accuracy_reward_conf_tag": 0.458984375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 148
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 656.0,
|
|
"completions/max_terminated_length": 656.0,
|
|
"completions/mean_length": 134.099609375,
|
|
"completions/mean_terminated_length": 134.099609375,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.4768,
|
|
"grad_norm": 0.2993578016757965,
|
|
"learning_rate": 5.871886120996441e-07,
|
|
"loss": 0.0044,
|
|
"num_tokens": 105146954.0,
|
|
"reward": 1.052734375,
|
|
"reward_std": 0.11027441918849945,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 149
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 577.0,
|
|
"completions/max_terminated_length": 577.0,
|
|
"completions/mean_length": 141.923828125,
|
|
"completions/mean_terminated_length": 141.923828125,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.29732728004455566,
|
|
"learning_rate": 5.836298932384342e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 105854939.0,
|
|
"reward": 1.060546875,
|
|
"reward_std": 0.1720854938030243,
|
|
"rewards/accuracy_reward_conf_tag": 0.560546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 495.0,
|
|
"completions/max_terminated_length": 495.0,
|
|
"completions/mean_length": 129.712890625,
|
|
"completions/mean_terminated_length": 129.712890625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.4832,
|
|
"grad_norm": 0.3430553674697876,
|
|
"learning_rate": 5.800711743772242e-07,
|
|
"loss": 0.001,
|
|
"num_tokens": 106583016.0,
|
|
"reward": 1.0087890625,
|
|
"reward_std": 0.14236661791801453,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 151
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 526.0,
|
|
"completions/max_terminated_length": 526.0,
|
|
"completions/mean_length": 127.662109375,
|
|
"completions/mean_terminated_length": 127.662109375,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.4864,
|
|
"grad_norm": 0.20804554224014282,
|
|
"learning_rate": 5.765124555160142e-07,
|
|
"loss": 0.0015,
|
|
"num_tokens": 107295859.0,
|
|
"reward": 1.0498046875,
|
|
"reward_std": 0.0739220678806305,
|
|
"rewards/accuracy_reward_conf_tag": 0.55078125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 152
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 653.0,
|
|
"completions/max_terminated_length": 653.0,
|
|
"completions/mean_length": 133.904296875,
|
|
"completions/mean_terminated_length": 133.904296875,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.4896,
|
|
"grad_norm": 0.4418400228023529,
|
|
"learning_rate": 5.729537366548043e-07,
|
|
"loss": -0.0003,
|
|
"num_tokens": 108020442.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.12079255282878876,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 153
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 492.0,
|
|
"completions/max_terminated_length": 492.0,
|
|
"completions/mean_length": 120.732421875,
|
|
"completions/mean_terminated_length": 120.732421875,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.4928,
|
|
"grad_norm": 0.26306527853012085,
|
|
"learning_rate": 5.693950177935943e-07,
|
|
"loss": -0.0045,
|
|
"num_tokens": 108763561.0,
|
|
"reward": 1.0341796875,
|
|
"reward_std": 0.08357548713684082,
|
|
"rewards/accuracy_reward_conf_tag": 0.53515625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 154
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 368.0,
|
|
"completions/max_terminated_length": 368.0,
|
|
"completions/mean_length": 122.984375,
|
|
"completions/mean_terminated_length": 122.984375,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.32889747619628906,
|
|
"learning_rate": 5.658362989323842e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 109474705.0,
|
|
"reward": 1.013671875,
|
|
"reward_std": 0.12204372882843018,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1081.0,
|
|
"completions/max_terminated_length": 1081.0,
|
|
"completions/mean_length": 123.974609375,
|
|
"completions/mean_terminated_length": 123.974609375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.4992,
|
|
"grad_norm": 0.37933510541915894,
|
|
"learning_rate": 5.622775800711744e-07,
|
|
"loss": 0.0018,
|
|
"num_tokens": 110193772.0,
|
|
"reward": 1.0966796875,
|
|
"reward_std": 0.10429303348064423,
|
|
"rewards/accuracy_reward_conf_tag": 0.59765625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 156
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 570.0,
|
|
"completions/max_terminated_length": 570.0,
|
|
"completions/mean_length": 116.595703125,
|
|
"completions/mean_terminated_length": 116.8238754272461,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.5024,
|
|
"grad_norm": 0.2643898129463196,
|
|
"learning_rate": 5.587188612099644e-07,
|
|
"loss": 0.001,
|
|
"num_tokens": 110892333.0,
|
|
"reward": 1.0615234375,
|
|
"reward_std": 0.12381990253925323,
|
|
"rewards/accuracy_reward_conf_tag": 0.5625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 157
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 622.0,
|
|
"completions/max_terminated_length": 622.0,
|
|
"completions/mean_length": 123.662109375,
|
|
"completions/mean_terminated_length": 123.662109375,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.5056,
|
|
"grad_norm": 0.32417362928390503,
|
|
"learning_rate": 5.551601423487544e-07,
|
|
"loss": -0.0011,
|
|
"num_tokens": 111618800.0,
|
|
"reward": 1.009765625,
|
|
"reward_std": 0.10362998396158218,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 158
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 482.0,
|
|
"completions/max_terminated_length": 482.0,
|
|
"completions/mean_length": 124.189453125,
|
|
"completions/mean_terminated_length": 124.189453125,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.5088,
|
|
"grad_norm": 0.3062315881252289,
|
|
"learning_rate": 5.516014234875445e-07,
|
|
"loss": -0.0009,
|
|
"num_tokens": 112338761.0,
|
|
"reward": 1.046875,
|
|
"reward_std": 0.08956344425678253,
|
|
"rewards/accuracy_reward_conf_tag": 0.546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 159
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 266.0,
|
|
"completions/max_terminated_length": 266.0,
|
|
"completions/mean_length": 112.80859375,
|
|
"completions/mean_terminated_length": 112.80859375,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.3602045178413391,
|
|
"learning_rate": 5.480427046263345e-07,
|
|
"loss": 0.0026,
|
|
"num_tokens": 113036599.0,
|
|
"reward": 1.0224609375,
|
|
"reward_std": 0.13203760981559753,
|
|
"rewards/accuracy_reward_conf_tag": 0.5234375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 964.0,
|
|
"completions/max_terminated_length": 964.0,
|
|
"completions/mean_length": 121.591796875,
|
|
"completions/mean_terminated_length": 121.591796875,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.5152,
|
|
"grad_norm": 0.36861321330070496,
|
|
"learning_rate": 5.444839857651245e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 113745438.0,
|
|
"reward": 1.033203125,
|
|
"reward_std": 0.11691641062498093,
|
|
"rewards/accuracy_reward_conf_tag": 0.533203125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 161
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 345.0,
|
|
"completions/max_terminated_length": 345.0,
|
|
"completions/mean_length": 113.275390625,
|
|
"completions/mean_terminated_length": 113.275390625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.5184,
|
|
"grad_norm": 0.21255213022232056,
|
|
"learning_rate": 5.409252669039146e-07,
|
|
"loss": 0.0005,
|
|
"num_tokens": 114444123.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.05102773755788803,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 162
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 823.0,
|
|
"completions/max_terminated_length": 823.0,
|
|
"completions/mean_length": 119.599609375,
|
|
"completions/mean_terminated_length": 119.599609375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.5216,
|
|
"grad_norm": 0.17109361290931702,
|
|
"learning_rate": 5.373665480427047e-07,
|
|
"loss": 0.0029,
|
|
"num_tokens": 115140918.0,
|
|
"reward": 0.923828125,
|
|
"reward_std": 0.05444513261318207,
|
|
"rewards/accuracy_reward_conf_tag": 0.423828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 163
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 426.0,
|
|
"completions/max_terminated_length": 426.0,
|
|
"completions/mean_length": 114.478515625,
|
|
"completions/mean_terminated_length": 114.478515625,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.5248,
|
|
"grad_norm": 0.2568250298500061,
|
|
"learning_rate": 5.338078291814946e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 115846435.0,
|
|
"reward": 1.126953125,
|
|
"reward_std": 0.08982865512371063,
|
|
"rewards/accuracy_reward_conf_tag": 0.626953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 164
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 764.0,
|
|
"completions/max_terminated_length": 764.0,
|
|
"completions/mean_length": 118.228515625,
|
|
"completions/mean_terminated_length": 118.228515625,
|
|
"completions/min_length": 59.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.29473572969436646,
|
|
"learning_rate": 5.302491103202846e-07,
|
|
"loss": -0.0012,
|
|
"num_tokens": 116562272.0,
|
|
"reward": 1.001953125,
|
|
"reward_std": 0.07752697169780731,
|
|
"rewards/accuracy_reward_conf_tag": 0.501953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 466.0,
|
|
"completions/max_terminated_length": 466.0,
|
|
"completions/mean_length": 114.53125,
|
|
"completions/mean_terminated_length": 114.53125,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.5312,
|
|
"grad_norm": 0.3796415328979492,
|
|
"learning_rate": 5.266903914590747e-07,
|
|
"loss": -0.0038,
|
|
"num_tokens": 117266864.0,
|
|
"reward": 1.06640625,
|
|
"reward_std": 0.15682196617126465,
|
|
"rewards/accuracy_reward_conf_tag": 0.56640625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 166
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 445.0,
|
|
"completions/max_terminated_length": 445.0,
|
|
"completions/mean_length": 115.1171875,
|
|
"completions/mean_terminated_length": 115.1171875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.5344,
|
|
"grad_norm": 0.3149721324443817,
|
|
"learning_rate": 5.231316725978647e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 117977604.0,
|
|
"reward": 1.119140625,
|
|
"reward_std": 0.11040420830249786,
|
|
"rewards/accuracy_reward_conf_tag": 0.619140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 167
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 409.0,
|
|
"completions/max_terminated_length": 409.0,
|
|
"completions/mean_length": 118.15625,
|
|
"completions/mean_terminated_length": 118.15625,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.5376,
|
|
"grad_norm": 0.34061557054519653,
|
|
"learning_rate": 5.195729537366548e-07,
|
|
"loss": -0.0016,
|
|
"num_tokens": 118671964.0,
|
|
"reward": 1.048828125,
|
|
"reward_std": 0.12014345824718475,
|
|
"rewards/accuracy_reward_conf_tag": 0.548828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 168
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 463.0,
|
|
"completions/max_terminated_length": 463.0,
|
|
"completions/mean_length": 116.755859375,
|
|
"completions/mean_terminated_length": 116.755859375,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.5408,
|
|
"grad_norm": 0.2851979434490204,
|
|
"learning_rate": 5.160142348754448e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 119399271.0,
|
|
"reward": 1.0703125,
|
|
"reward_std": 0.07483351975679398,
|
|
"rewards/accuracy_reward_conf_tag": 0.5703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 169
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 594.0,
|
|
"completions/max_terminated_length": 594.0,
|
|
"completions/mean_length": 114.17578125,
|
|
"completions/mean_terminated_length": 114.39921569824219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.5101816058158875,
|
|
"learning_rate": 5.124555160142349e-07,
|
|
"loss": -0.0,
|
|
"num_tokens": 120117137.0,
|
|
"reward": 1.0439453125,
|
|
"reward_std": 0.07975868880748749,
|
|
"rewards/accuracy_reward_conf_tag": 0.544921875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1287.0,
|
|
"completions/max_terminated_length": 1287.0,
|
|
"completions/mean_length": 118.01953125,
|
|
"completions/mean_terminated_length": 118.01953125,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.5472,
|
|
"grad_norm": 0.3264475464820862,
|
|
"learning_rate": 5.088967971530249e-07,
|
|
"loss": -0.0012,
|
|
"num_tokens": 120812827.0,
|
|
"reward": 1.0087890625,
|
|
"reward_std": 0.1307452917098999,
|
|
"rewards/accuracy_reward_conf_tag": 0.51171875,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 171
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 288.0,
|
|
"completions/max_terminated_length": 288.0,
|
|
"completions/mean_length": 111.177734375,
|
|
"completions/mean_terminated_length": 111.39530181884766,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.5504,
|
|
"grad_norm": 0.2982167899608612,
|
|
"learning_rate": 5.053380782918149e-07,
|
|
"loss": 0.0012,
|
|
"num_tokens": 121500278.0,
|
|
"reward": 1.099609375,
|
|
"reward_std": 0.10055398941040039,
|
|
"rewards/accuracy_reward_conf_tag": 0.6015625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 172
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 606.0,
|
|
"completions/max_terminated_length": 606.0,
|
|
"completions/mean_length": 116.041015625,
|
|
"completions/mean_terminated_length": 116.041015625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.5536,
|
|
"grad_norm": 0.3056163787841797,
|
|
"learning_rate": 5.01779359430605e-07,
|
|
"loss": -0.0033,
|
|
"num_tokens": 122194275.0,
|
|
"reward": 1.0107421875,
|
|
"reward_std": 0.110807403922081,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 173
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 422.0,
|
|
"completions/max_terminated_length": 422.0,
|
|
"completions/mean_length": 116.109375,
|
|
"completions/mean_terminated_length": 116.33659362792969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.5568,
|
|
"grad_norm": 0.3178585469722748,
|
|
"learning_rate": 4.98220640569395e-07,
|
|
"loss": 0.0029,
|
|
"num_tokens": 122884171.0,
|
|
"reward": 0.9658203125,
|
|
"reward_std": 0.12013532966375351,
|
|
"rewards/accuracy_reward_conf_tag": 0.466796875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 174
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 390.0,
|
|
"completions/max_terminated_length": 390.0,
|
|
"completions/mean_length": 117.310546875,
|
|
"completions/mean_terminated_length": 117.310546875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.30916622281074524,
|
|
"learning_rate": 4.94661921708185e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 123586378.0,
|
|
"reward": 0.994140625,
|
|
"reward_std": 0.07555306702852249,
|
|
"rewards/accuracy_reward_conf_tag": 0.494140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 465.0,
|
|
"completions/max_terminated_length": 465.0,
|
|
"completions/mean_length": 124.25390625,
|
|
"completions/mean_terminated_length": 124.74118041992188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.5632,
|
|
"grad_norm": 0.2795889377593994,
|
|
"learning_rate": 4.91103202846975e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 124316028.0,
|
|
"reward": 0.91015625,
|
|
"reward_std": 0.13375455141067505,
|
|
"rewards/accuracy_reward_conf_tag": 0.412109375,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 176
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 390.0,
|
|
"completions/max_terminated_length": 390.0,
|
|
"completions/mean_length": 112.29296875,
|
|
"completions/mean_terminated_length": 112.29296875,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.5664,
|
|
"grad_norm": 0.24903085827827454,
|
|
"learning_rate": 4.875444839857651e-07,
|
|
"loss": 0.0034,
|
|
"num_tokens": 125036914.0,
|
|
"reward": 1.0546875,
|
|
"reward_std": 0.09239231050014496,
|
|
"rewards/accuracy_reward_conf_tag": 0.5546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 177
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 528.0,
|
|
"completions/max_terminated_length": 528.0,
|
|
"completions/mean_length": 114.640625,
|
|
"completions/mean_terminated_length": 114.640625,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.5696,
|
|
"grad_norm": 0.28691524267196655,
|
|
"learning_rate": 4.839857651245551e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 125732938.0,
|
|
"reward": 1.060546875,
|
|
"reward_std": 0.1258593201637268,
|
|
"rewards/accuracy_reward_conf_tag": 0.560546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 178
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 509.0,
|
|
"completions/max_terminated_length": 509.0,
|
|
"completions/mean_length": 125.01953125,
|
|
"completions/mean_terminated_length": 125.01953125,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.5728,
|
|
"grad_norm": 0.27484962344169617,
|
|
"learning_rate": 4.804270462633451e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 126441572.0,
|
|
"reward": 1.029296875,
|
|
"reward_std": 0.08219750225543976,
|
|
"rewards/accuracy_reward_conf_tag": 0.529296875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 179
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 334.0,
|
|
"completions/max_terminated_length": 334.0,
|
|
"completions/mean_length": 114.263671875,
|
|
"completions/mean_terminated_length": 114.263671875,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.2465706318616867,
|
|
"learning_rate": 4.768683274021353e-07,
|
|
"loss": 0.0014,
|
|
"num_tokens": 127153011.0,
|
|
"reward": 0.95703125,
|
|
"reward_std": 0.1047501489520073,
|
|
"rewards/accuracy_reward_conf_tag": 0.45703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 468.0,
|
|
"completions/max_terminated_length": 468.0,
|
|
"completions/mean_length": 123.73046875,
|
|
"completions/mean_terminated_length": 123.73046875,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.5792,
|
|
"grad_norm": 0.23253266513347626,
|
|
"learning_rate": 4.733096085409252e-07,
|
|
"loss": 0.0024,
|
|
"num_tokens": 127890969.0,
|
|
"reward": 1.0341796875,
|
|
"reward_std": 0.11296170204877853,
|
|
"rewards/accuracy_reward_conf_tag": 0.53515625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 181
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 585.0,
|
|
"completions/max_terminated_length": 585.0,
|
|
"completions/mean_length": 117.541015625,
|
|
"completions/mean_terminated_length": 117.541015625,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.5824,
|
|
"grad_norm": 0.22888119518756866,
|
|
"learning_rate": 4.697508896797153e-07,
|
|
"loss": 0.0041,
|
|
"num_tokens": 128599398.0,
|
|
"reward": 0.9697265625,
|
|
"reward_std": 0.07049369812011719,
|
|
"rewards/accuracy_reward_conf_tag": 0.470703125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 182
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 571.0,
|
|
"completions/max_terminated_length": 571.0,
|
|
"completions/mean_length": 120.60546875,
|
|
"completions/mean_terminated_length": 120.60546875,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.5856,
|
|
"grad_norm": 0.2716979682445526,
|
|
"learning_rate": 4.661921708185053e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 129309604.0,
|
|
"reward": 0.982421875,
|
|
"reward_std": 0.10448494553565979,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 183
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 526.0,
|
|
"completions/max_terminated_length": 526.0,
|
|
"completions/mean_length": 120.044921875,
|
|
"completions/mean_terminated_length": 120.044921875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.5888,
|
|
"grad_norm": 0.22985929250717163,
|
|
"learning_rate": 4.626334519572954e-07,
|
|
"loss": -0.0002,
|
|
"num_tokens": 130010235.0,
|
|
"reward": 1.046875,
|
|
"reward_std": 0.07318221032619476,
|
|
"rewards/accuracy_reward_conf_tag": 0.546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 184
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 489.0,
|
|
"completions/max_terminated_length": 489.0,
|
|
"completions/mean_length": 122.2890625,
|
|
"completions/mean_terminated_length": 122.2890625,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.3127327859401703,
|
|
"learning_rate": 4.590747330960854e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 130721879.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.13762861490249634,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 410.0,
|
|
"completions/max_terminated_length": 410.0,
|
|
"completions/mean_length": 115.82421875,
|
|
"completions/mean_terminated_length": 115.82421875,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.5952,
|
|
"grad_norm": 0.2917990982532501,
|
|
"learning_rate": 4.555160142348754e-07,
|
|
"loss": 0.0026,
|
|
"num_tokens": 131429917.0,
|
|
"reward": 1.044921875,
|
|
"reward_std": 0.10915547609329224,
|
|
"rewards/accuracy_reward_conf_tag": 0.544921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 186
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 401.0,
|
|
"completions/max_terminated_length": 401.0,
|
|
"completions/mean_length": 119.55078125,
|
|
"completions/mean_terminated_length": 119.55078125,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.5984,
|
|
"grad_norm": 0.2931201457977295,
|
|
"learning_rate": 4.519572953736655e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 132127159.0,
|
|
"reward": 0.94921875,
|
|
"reward_std": 0.1420920491218567,
|
|
"rewards/accuracy_reward_conf_tag": 0.44921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 187
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 313.0,
|
|
"completions/max_terminated_length": 313.0,
|
|
"completions/mean_length": 117.220703125,
|
|
"completions/mean_terminated_length": 117.220703125,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.6016,
|
|
"grad_norm": 0.31902194023132324,
|
|
"learning_rate": 4.483985765124555e-07,
|
|
"loss": 0.0057,
|
|
"num_tokens": 132821896.0,
|
|
"reward": 1.029296875,
|
|
"reward_std": 0.10403060913085938,
|
|
"rewards/accuracy_reward_conf_tag": 0.529296875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 188
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 577.0,
|
|
"completions/max_terminated_length": 577.0,
|
|
"completions/mean_length": 115.369140625,
|
|
"completions/mean_terminated_length": 115.59490966796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.6048,
|
|
"grad_norm": 0.27381059527397156,
|
|
"learning_rate": 4.4483985765124553e-07,
|
|
"loss": 0.0004,
|
|
"num_tokens": 133510541.0,
|
|
"reward": 1.080078125,
|
|
"reward_std": 0.09759338200092316,
|
|
"rewards/accuracy_reward_conf_tag": 0.58203125,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 189
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 430.0,
|
|
"completions/max_terminated_length": 430.0,
|
|
"completions/mean_length": 118.484375,
|
|
"completions/mean_terminated_length": 118.484375,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.2547455132007599,
|
|
"learning_rate": 4.412811387900356e-07,
|
|
"loss": -0.0002,
|
|
"num_tokens": 134239669.0,
|
|
"reward": 0.994140625,
|
|
"reward_std": 0.09725197404623032,
|
|
"rewards/accuracy_reward_conf_tag": 0.494140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 665.0,
|
|
"completions/max_terminated_length": 665.0,
|
|
"completions/mean_length": 123.349609375,
|
|
"completions/mean_terminated_length": 123.349609375,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.6112,
|
|
"grad_norm": 0.3871206045150757,
|
|
"learning_rate": 4.377224199288256e-07,
|
|
"loss": -0.0025,
|
|
"num_tokens": 134972328.0,
|
|
"reward": 1.0380859375,
|
|
"reward_std": 0.11173088103532791,
|
|
"rewards/accuracy_reward_conf_tag": 0.5390625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 191
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 582.0,
|
|
"completions/max_terminated_length": 582.0,
|
|
"completions/mean_length": 124.984375,
|
|
"completions/mean_terminated_length": 124.984375,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.6144,
|
|
"grad_norm": 0.3271653950214386,
|
|
"learning_rate": 4.341637010676156e-07,
|
|
"loss": 0.0061,
|
|
"num_tokens": 135704184.0,
|
|
"reward": 1.013671875,
|
|
"reward_std": 0.10304145514965057,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 192
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 498.0,
|
|
"completions/max_terminated_length": 498.0,
|
|
"completions/mean_length": 115.599609375,
|
|
"completions/mean_terminated_length": 115.8258285522461,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.6176,
|
|
"grad_norm": 0.24646882712841034,
|
|
"learning_rate": 4.306049822064057e-07,
|
|
"loss": -0.0012,
|
|
"num_tokens": 136416035.0,
|
|
"reward": 1.0517578125,
|
|
"reward_std": 0.09889516979455948,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 193
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 543.0,
|
|
"completions/max_terminated_length": 543.0,
|
|
"completions/mean_length": 119.814453125,
|
|
"completions/mean_terminated_length": 119.814453125,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.6208,
|
|
"grad_norm": 0.2984526753425598,
|
|
"learning_rate": 4.2704626334519573e-07,
|
|
"loss": 0.0048,
|
|
"num_tokens": 137126172.0,
|
|
"reward": 1.025390625,
|
|
"reward_std": 0.10869672894477844,
|
|
"rewards/accuracy_reward_conf_tag": 0.525390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 194
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 475.0,
|
|
"completions/max_terminated_length": 475.0,
|
|
"completions/mean_length": 127.044921875,
|
|
"completions/mean_terminated_length": 127.044921875,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.24040964245796204,
|
|
"learning_rate": 4.2348754448398576e-07,
|
|
"loss": -0.0021,
|
|
"num_tokens": 137855971.0,
|
|
"reward": 0.8984375,
|
|
"reward_std": 0.09356936812400818,
|
|
"rewards/accuracy_reward_conf_tag": 0.3984375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 512.0,
|
|
"completions/max_terminated_length": 512.0,
|
|
"completions/mean_length": 123.66796875,
|
|
"completions/mean_terminated_length": 123.90998077392578,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.6272,
|
|
"grad_norm": 0.26657259464263916,
|
|
"learning_rate": 4.199288256227758e-07,
|
|
"loss": 0.005,
|
|
"num_tokens": 138587409.0,
|
|
"reward": 1.1162109375,
|
|
"reward_std": 0.1010020449757576,
|
|
"rewards/accuracy_reward_conf_tag": 0.6171875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 196
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1132.0,
|
|
"completions/max_terminated_length": 1132.0,
|
|
"completions/mean_length": 124.146484375,
|
|
"completions/mean_terminated_length": 124.146484375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.6304,
|
|
"grad_norm": 0.3319939374923706,
|
|
"learning_rate": 4.163701067615658e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 139315148.0,
|
|
"reward": 1.00390625,
|
|
"reward_std": 0.07259123027324677,
|
|
"rewards/accuracy_reward_conf_tag": 0.50390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 197
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 563.0,
|
|
"completions/max_terminated_length": 563.0,
|
|
"completions/mean_length": 125.134765625,
|
|
"completions/mean_terminated_length": 125.134765625,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.6336,
|
|
"grad_norm": 0.23367168009281158,
|
|
"learning_rate": 4.1281138790035585e-07,
|
|
"loss": 0.0038,
|
|
"num_tokens": 140026385.0,
|
|
"reward": 1.1259765625,
|
|
"reward_std": 0.08870036154985428,
|
|
"rewards/accuracy_reward_conf_tag": 0.626953125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 198
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 438.0,
|
|
"completions/max_terminated_length": 438.0,
|
|
"completions/mean_length": 126.7265625,
|
|
"completions/mean_terminated_length": 126.7265625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.6368,
|
|
"grad_norm": 0.24098913371562958,
|
|
"learning_rate": 4.0925266903914593e-07,
|
|
"loss": -0.0017,
|
|
"num_tokens": 140748997.0,
|
|
"reward": 0.927734375,
|
|
"reward_std": 0.0949535220861435,
|
|
"rewards/accuracy_reward_conf_tag": 0.427734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 199
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 717.0,
|
|
"completions/max_terminated_length": 717.0,
|
|
"completions/mean_length": 129.03515625,
|
|
"completions/mean_terminated_length": 129.03515625,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.1898925006389618,
|
|
"learning_rate": 4.0569395017793596e-07,
|
|
"loss": -0.0028,
|
|
"num_tokens": 141481199.0,
|
|
"reward": 1.0615234375,
|
|
"reward_std": 0.062272798269987106,
|
|
"rewards/accuracy_reward_conf_tag": 0.5625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 337.0,
|
|
"completions/max_terminated_length": 337.0,
|
|
"completions/mean_length": 120.771484375,
|
|
"completions/mean_terminated_length": 120.771484375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.6432,
|
|
"grad_norm": 0.2958911061286926,
|
|
"learning_rate": 4.0213523131672593e-07,
|
|
"loss": 0.0044,
|
|
"num_tokens": 142180746.0,
|
|
"reward": 1.01953125,
|
|
"reward_std": 0.11316017061471939,
|
|
"rewards/accuracy_reward_conf_tag": 0.51953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 201
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 388.0,
|
|
"completions/max_terminated_length": 388.0,
|
|
"completions/mean_length": 122.888671875,
|
|
"completions/mean_terminated_length": 122.888671875,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.6464,
|
|
"grad_norm": 0.20609861612319946,
|
|
"learning_rate": 3.98576512455516e-07,
|
|
"loss": 0.0048,
|
|
"num_tokens": 142876849.0,
|
|
"reward": 1.021484375,
|
|
"reward_std": 0.07187168300151825,
|
|
"rewards/accuracy_reward_conf_tag": 0.521484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 202
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 414.0,
|
|
"completions/max_terminated_length": 414.0,
|
|
"completions/mean_length": 124.310546875,
|
|
"completions/mean_terminated_length": 124.310546875,
|
|
"completions/min_length": 59.0,
|
|
"completions/min_terminated_length": 59.0,
|
|
"epoch": 0.6496,
|
|
"grad_norm": 0.28507986664772034,
|
|
"learning_rate": 3.9501779359430604e-07,
|
|
"loss": 0.0035,
|
|
"num_tokens": 143564056.0,
|
|
"reward": 1.080078125,
|
|
"reward_std": 0.11849214136600494,
|
|
"rewards/accuracy_reward_conf_tag": 0.580078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 203
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 662.0,
|
|
"completions/max_terminated_length": 662.0,
|
|
"completions/mean_length": 138.58203125,
|
|
"completions/mean_terminated_length": 138.58203125,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.6528,
|
|
"grad_norm": 0.216828852891922,
|
|
"learning_rate": 3.9145907473309607e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 144275274.0,
|
|
"reward": 0.87890625,
|
|
"reward_std": 0.07358038425445557,
|
|
"rewards/accuracy_reward_conf_tag": 0.37890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 204
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 362.0,
|
|
"completions/max_terminated_length": 362.0,
|
|
"completions/mean_length": 126.18359375,
|
|
"completions/mean_terminated_length": 126.18359375,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.22173930704593658,
|
|
"learning_rate": 3.879003558718861e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 144986928.0,
|
|
"reward": 1.015625,
|
|
"reward_std": 0.06450574845075607,
|
|
"rewards/accuracy_reward_conf_tag": 0.515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 788.0,
|
|
"completions/max_terminated_length": 788.0,
|
|
"completions/mean_length": 127.94140625,
|
|
"completions/mean_terminated_length": 127.94140625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.6592,
|
|
"grad_norm": 0.2896319329738617,
|
|
"learning_rate": 3.8434163701067613e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 145675522.0,
|
|
"reward": 1.037109375,
|
|
"reward_std": 0.09876909852027893,
|
|
"rewards/accuracy_reward_conf_tag": 0.537109375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 206
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 377.0,
|
|
"completions/max_terminated_length": 377.0,
|
|
"completions/mean_length": 117.310546875,
|
|
"completions/mean_terminated_length": 117.310546875,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.6624,
|
|
"grad_norm": 0.2767243981361389,
|
|
"learning_rate": 3.8078291814946616e-07,
|
|
"loss": -0.0037,
|
|
"num_tokens": 146384049.0,
|
|
"reward": 1.064453125,
|
|
"reward_std": 0.10981568694114685,
|
|
"rewards/accuracy_reward_conf_tag": 0.564453125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 207
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 861.0,
|
|
"completions/max_terminated_length": 861.0,
|
|
"completions/mean_length": 120.791015625,
|
|
"completions/mean_terminated_length": 120.791015625,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.6656,
|
|
"grad_norm": 0.25350600481033325,
|
|
"learning_rate": 3.7722419928825624e-07,
|
|
"loss": 0.0069,
|
|
"num_tokens": 147092478.0,
|
|
"reward": 1.076171875,
|
|
"reward_std": 0.08705839514732361,
|
|
"rewards/accuracy_reward_conf_tag": 0.576171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 208
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 368.0,
|
|
"completions/max_terminated_length": 368.0,
|
|
"completions/mean_length": 125.974609375,
|
|
"completions/mean_terminated_length": 125.974609375,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.6688,
|
|
"grad_norm": 0.23264235258102417,
|
|
"learning_rate": 3.7366548042704627e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 147802105.0,
|
|
"reward": 0.98828125,
|
|
"reward_std": 0.08193229138851166,
|
|
"rewards/accuracy_reward_conf_tag": 0.48828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 209
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 473.0,
|
|
"completions/max_terminated_length": 473.0,
|
|
"completions/mean_length": 130.1171875,
|
|
"completions/mean_terminated_length": 130.1171875,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.2470727115869522,
|
|
"learning_rate": 3.7010676156583625e-07,
|
|
"loss": -0.0015,
|
|
"num_tokens": 148501501.0,
|
|
"reward": 0.9091796875,
|
|
"reward_std": 0.08568359166383743,
|
|
"rewards/accuracy_reward_conf_tag": 0.41015625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 503.0,
|
|
"completions/max_terminated_length": 503.0,
|
|
"completions/mean_length": 127.25,
|
|
"completions/mean_terminated_length": 127.25,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.6752,
|
|
"grad_norm": 0.28654050827026367,
|
|
"learning_rate": 3.6654804270462633e-07,
|
|
"loss": 0.002,
|
|
"num_tokens": 149191381.0,
|
|
"reward": 1.0615234375,
|
|
"reward_std": 0.11502020061016083,
|
|
"rewards/accuracy_reward_conf_tag": 0.5625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 211
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 389.0,
|
|
"completions/max_terminated_length": 389.0,
|
|
"completions/mean_length": 130.220703125,
|
|
"completions/mean_terminated_length": 130.4755401611328,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.6784,
|
|
"grad_norm": 0.26739853620529175,
|
|
"learning_rate": 3.6298932384341636e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 149898590.0,
|
|
"reward": 0.8984375,
|
|
"reward_std": 0.1149473488330841,
|
|
"rewards/accuracy_reward_conf_tag": 0.400390625,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 212
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005859375,
|
|
"completions/max_length": 475.0,
|
|
"completions/max_terminated_length": 475.0,
|
|
"completions/mean_length": 127.45703125,
|
|
"completions/mean_terminated_length": 128.208251953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.6816,
|
|
"grad_norm": 0.2606695294380188,
|
|
"learning_rate": 3.594306049822064e-07,
|
|
"loss": -0.001,
|
|
"num_tokens": 150606176.0,
|
|
"reward": 1.0263671875,
|
|
"reward_std": 0.10803714394569397,
|
|
"rewards/accuracy_reward_conf_tag": 0.529296875,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 213
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 373.0,
|
|
"completions/max_terminated_length": 373.0,
|
|
"completions/mean_length": 120.955078125,
|
|
"completions/mean_terminated_length": 120.955078125,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.6848,
|
|
"grad_norm": 0.26082226634025574,
|
|
"learning_rate": 3.5587188612099647e-07,
|
|
"loss": -0.0026,
|
|
"num_tokens": 151324273.0,
|
|
"reward": 1.05078125,
|
|
"reward_std": 0.09935884922742844,
|
|
"rewards/accuracy_reward_conf_tag": 0.55078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 214
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 375.0,
|
|
"completions/max_terminated_length": 375.0,
|
|
"completions/mean_length": 123.8515625,
|
|
"completions/mean_terminated_length": 123.8515625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.3981391191482544,
|
|
"learning_rate": 3.5231316725978644e-07,
|
|
"loss": -0.0033,
|
|
"num_tokens": 152030045.0,
|
|
"reward": 1.1025390625,
|
|
"reward_std": 0.13599222898483276,
|
|
"rewards/accuracy_reward_conf_tag": 0.603515625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 755.0,
|
|
"completions/max_terminated_length": 755.0,
|
|
"completions/mean_length": 126.810546875,
|
|
"completions/mean_terminated_length": 126.810546875,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.6912,
|
|
"grad_norm": 0.3804955780506134,
|
|
"learning_rate": 3.4875444839857647e-07,
|
|
"loss": 0.004,
|
|
"num_tokens": 152735876.0,
|
|
"reward": 0.982421875,
|
|
"reward_std": 0.17557457089424133,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 216
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 624.0,
|
|
"completions/max_terminated_length": 624.0,
|
|
"completions/mean_length": 124.138671875,
|
|
"completions/mean_terminated_length": 124.138671875,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.6944,
|
|
"grad_norm": 0.24549926817417145,
|
|
"learning_rate": 3.4519572953736656e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 153428035.0,
|
|
"reward": 1.001953125,
|
|
"reward_std": 0.06338557600975037,
|
|
"rewards/accuracy_reward_conf_tag": 0.501953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 217
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 374.0,
|
|
"completions/max_terminated_length": 374.0,
|
|
"completions/mean_length": 123.3359375,
|
|
"completions/mean_terminated_length": 123.3359375,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.6976,
|
|
"grad_norm": 0.3150384724140167,
|
|
"learning_rate": 3.416370106761566e-07,
|
|
"loss": -0.0008,
|
|
"num_tokens": 154128247.0,
|
|
"reward": 1.001953125,
|
|
"reward_std": 0.11889031529426575,
|
|
"rewards/accuracy_reward_conf_tag": 0.501953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 218
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 426.0,
|
|
"completions/max_terminated_length": 426.0,
|
|
"completions/mean_length": 114.775390625,
|
|
"completions/mean_terminated_length": 115.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.7008,
|
|
"grad_norm": 0.2552780508995056,
|
|
"learning_rate": 3.380782918149466e-07,
|
|
"loss": 0.0039,
|
|
"num_tokens": 154825932.0,
|
|
"reward": 1.0927734375,
|
|
"reward_std": 0.10930263996124268,
|
|
"rewards/accuracy_reward_conf_tag": 0.59375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 219
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 677.0,
|
|
"completions/max_terminated_length": 677.0,
|
|
"completions/mean_length": 125.26953125,
|
|
"completions/mean_terminated_length": 125.26953125,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.22730842232704163,
|
|
"learning_rate": 3.3451957295373664e-07,
|
|
"loss": 0.0065,
|
|
"num_tokens": 155528846.0,
|
|
"reward": 1.0859375,
|
|
"reward_std": 0.09271685779094696,
|
|
"rewards/accuracy_reward_conf_tag": 0.5859375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 624.0,
|
|
"completions/max_terminated_length": 624.0,
|
|
"completions/mean_length": 126.076171875,
|
|
"completions/mean_terminated_length": 126.076171875,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.7072,
|
|
"grad_norm": 0.2676522433757782,
|
|
"learning_rate": 3.3096085409252667e-07,
|
|
"loss": 0.0046,
|
|
"num_tokens": 156256997.0,
|
|
"reward": 1.025390625,
|
|
"reward_std": 0.11579746752977371,
|
|
"rewards/accuracy_reward_conf_tag": 0.525390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 221
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 311.0,
|
|
"completions/max_terminated_length": 311.0,
|
|
"completions/mean_length": 125.025390625,
|
|
"completions/mean_terminated_length": 125.025390625,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.7104,
|
|
"grad_norm": 0.33278122544288635,
|
|
"learning_rate": 3.274021352313167e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 156978754.0,
|
|
"reward": 1.01171875,
|
|
"reward_std": 0.1259922832250595,
|
|
"rewards/accuracy_reward_conf_tag": 0.51171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 222
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 471.0,
|
|
"completions/max_terminated_length": 471.0,
|
|
"completions/mean_length": 126.0625,
|
|
"completions/mean_terminated_length": 126.0625,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.7136,
|
|
"grad_norm": 0.32390889525413513,
|
|
"learning_rate": 3.238434163701068e-07,
|
|
"loss": 0.0037,
|
|
"num_tokens": 157693946.0,
|
|
"reward": 1.087890625,
|
|
"reward_std": 0.12954068183898926,
|
|
"rewards/accuracy_reward_conf_tag": 0.587890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 223
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 463.0,
|
|
"completions/max_terminated_length": 463.0,
|
|
"completions/mean_length": 121.4765625,
|
|
"completions/mean_terminated_length": 121.71428680419922,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.7168,
|
|
"grad_norm": 0.32255902886390686,
|
|
"learning_rate": 3.202846975088968e-07,
|
|
"loss": 0.0025,
|
|
"num_tokens": 158392110.0,
|
|
"reward": 1.0849609375,
|
|
"reward_std": 0.13625742495059967,
|
|
"rewards/accuracy_reward_conf_tag": 0.5859375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 224
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 414.0,
|
|
"completions/max_terminated_length": 414.0,
|
|
"completions/mean_length": 128.439453125,
|
|
"completions/mean_terminated_length": 128.439453125,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.3401506841182709,
|
|
"learning_rate": 3.167259786476868e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 159070015.0,
|
|
"reward": 1.00390625,
|
|
"reward_std": 0.14841194450855255,
|
|
"rewards/accuracy_reward_conf_tag": 0.50390625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 724.0,
|
|
"completions/max_terminated_length": 724.0,
|
|
"completions/mean_length": 124.62109375,
|
|
"completions/mean_terminated_length": 125.10980987548828,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.7232,
|
|
"grad_norm": 0.21677790582180023,
|
|
"learning_rate": 3.1316725978647687e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 159786877.0,
|
|
"reward": 1.056640625,
|
|
"reward_std": 0.08923016488552094,
|
|
"rewards/accuracy_reward_conf_tag": 0.55859375,
|
|
"rewards/format_reward_conf_tag": 0.99609375,
|
|
"step": 226
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 433.0,
|
|
"completions/max_terminated_length": 433.0,
|
|
"completions/mean_length": 121.728515625,
|
|
"completions/mean_terminated_length": 121.728515625,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.7264,
|
|
"grad_norm": 0.29960089921951294,
|
|
"learning_rate": 3.096085409252669e-07,
|
|
"loss": 0.0044,
|
|
"num_tokens": 160472538.0,
|
|
"reward": 1.10546875,
|
|
"reward_std": 0.09370160102844238,
|
|
"rewards/accuracy_reward_conf_tag": 0.60546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 227
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 387.0,
|
|
"completions/max_terminated_length": 387.0,
|
|
"completions/mean_length": 122.484375,
|
|
"completions/mean_terminated_length": 122.484375,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.7296,
|
|
"grad_norm": 0.25875020027160645,
|
|
"learning_rate": 3.0604982206405693e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 161170386.0,
|
|
"reward": 1.052734375,
|
|
"reward_std": 0.12433972954750061,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 228
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 503.0,
|
|
"completions/max_terminated_length": 503.0,
|
|
"completions/mean_length": 121.158203125,
|
|
"completions/mean_terminated_length": 121.158203125,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.7328,
|
|
"grad_norm": 0.2586037814617157,
|
|
"learning_rate": 3.02491103202847e-07,
|
|
"loss": 0.0045,
|
|
"num_tokens": 161868827.0,
|
|
"reward": 1.013671875,
|
|
"reward_std": 0.11691886186599731,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 229
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 276.0,
|
|
"completions/max_terminated_length": 276.0,
|
|
"completions/mean_length": 115.7421875,
|
|
"completions/mean_terminated_length": 115.7421875,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.2556305229663849,
|
|
"learning_rate": 2.98932384341637e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 162582639.0,
|
|
"reward": 1.076171875,
|
|
"reward_std": 0.07897046208381653,
|
|
"rewards/accuracy_reward_conf_tag": 0.576171875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 359.0,
|
|
"completions/max_terminated_length": 359.0,
|
|
"completions/mean_length": 121.33984375,
|
|
"completions/mean_terminated_length": 121.33984375,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.7392,
|
|
"grad_norm": 0.2588680386543274,
|
|
"learning_rate": 2.95373665480427e-07,
|
|
"loss": 0.0029,
|
|
"num_tokens": 163315461.0,
|
|
"reward": 1.05859375,
|
|
"reward_std": 0.12796618044376373,
|
|
"rewards/accuracy_reward_conf_tag": 0.55859375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 231
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 292.0,
|
|
"completions/max_terminated_length": 292.0,
|
|
"completions/mean_length": 112.7734375,
|
|
"completions/mean_terminated_length": 112.7734375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.7424,
|
|
"grad_norm": 0.2550903260707855,
|
|
"learning_rate": 2.918149466192171e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 164033505.0,
|
|
"reward": 1.08984375,
|
|
"reward_std": 0.09271685779094696,
|
|
"rewards/accuracy_reward_conf_tag": 0.58984375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 232
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 386.0,
|
|
"completions/max_terminated_length": 386.0,
|
|
"completions/mean_length": 128.5234375,
|
|
"completions/mean_terminated_length": 128.5234375,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.7456,
|
|
"grad_norm": 0.296998530626297,
|
|
"learning_rate": 2.882562277580071e-07,
|
|
"loss": -0.0,
|
|
"num_tokens": 164768973.0,
|
|
"reward": 1.0546875,
|
|
"reward_std": 0.12999820709228516,
|
|
"rewards/accuracy_reward_conf_tag": 0.5546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 233
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 497.0,
|
|
"completions/max_terminated_length": 497.0,
|
|
"completions/mean_length": 128.359375,
|
|
"completions/mean_terminated_length": 128.359375,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.7488,
|
|
"grad_norm": 0.17279453575611115,
|
|
"learning_rate": 2.8469750889679715e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 165477557.0,
|
|
"reward": 0.98046875,
|
|
"reward_std": 0.06378497928380966,
|
|
"rewards/accuracy_reward_conf_tag": 0.48046875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 234
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 383.0,
|
|
"completions/max_terminated_length": 383.0,
|
|
"completions/mean_length": 123.052734375,
|
|
"completions/mean_terminated_length": 123.052734375,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.1877635419368744,
|
|
"learning_rate": 2.811387900355872e-07,
|
|
"loss": 0.0019,
|
|
"num_tokens": 166171512.0,
|
|
"reward": 0.994140625,
|
|
"reward_std": 0.06207628548145294,
|
|
"rewards/accuracy_reward_conf_tag": 0.494140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 490.0,
|
|
"completions/max_terminated_length": 490.0,
|
|
"completions/mean_length": 123.8984375,
|
|
"completions/mean_terminated_length": 123.8984375,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.7552,
|
|
"grad_norm": 0.23798514902591705,
|
|
"learning_rate": 2.775800711743772e-07,
|
|
"loss": 0.0027,
|
|
"num_tokens": 166868556.0,
|
|
"reward": 0.974609375,
|
|
"reward_std": 0.09468954056501389,
|
|
"rewards/accuracy_reward_conf_tag": 0.474609375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 236
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 386.0,
|
|
"completions/max_terminated_length": 386.0,
|
|
"completions/mean_length": 125.880859375,
|
|
"completions/mean_terminated_length": 125.880859375,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.7584,
|
|
"grad_norm": 0.2679816782474518,
|
|
"learning_rate": 2.7402135231316724e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 167572671.0,
|
|
"reward": 0.9287109375,
|
|
"reward_std": 0.12495569884777069,
|
|
"rewards/accuracy_reward_conf_tag": 0.431640625,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 237
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 444.0,
|
|
"completions/max_terminated_length": 444.0,
|
|
"completions/mean_length": 130.19140625,
|
|
"completions/mean_terminated_length": 130.19140625,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.7616,
|
|
"grad_norm": 0.24259266257286072,
|
|
"learning_rate": 2.704626334519573e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 168286737.0,
|
|
"reward": 1.064453125,
|
|
"reward_std": 0.10928526520729065,
|
|
"rewards/accuracy_reward_conf_tag": 0.564453125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 238
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 453.0,
|
|
"completions/max_terminated_length": 453.0,
|
|
"completions/mean_length": 122.994140625,
|
|
"completions/mean_terminated_length": 122.994140625,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.7648,
|
|
"grad_norm": 0.2749550938606262,
|
|
"learning_rate": 2.669039145907473e-07,
|
|
"loss": 0.0019,
|
|
"num_tokens": 168984782.0,
|
|
"reward": 1.0888671875,
|
|
"reward_std": 0.08910098671913147,
|
|
"rewards/accuracy_reward_conf_tag": 0.58984375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 239
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 351.0,
|
|
"completions/max_terminated_length": 351.0,
|
|
"completions/mean_length": 117.353515625,
|
|
"completions/mean_terminated_length": 117.353515625,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.3518067002296448,
|
|
"learning_rate": 2.6334519572953733e-07,
|
|
"loss": 0.002,
|
|
"num_tokens": 169689987.0,
|
|
"reward": 0.9658203125,
|
|
"reward_std": 0.09588323533535004,
|
|
"rewards/accuracy_reward_conf_tag": 0.466796875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 438.0,
|
|
"completions/max_terminated_length": 438.0,
|
|
"completions/mean_length": 126.912109375,
|
|
"completions/mean_terminated_length": 126.912109375,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.7712,
|
|
"grad_norm": 0.2472555935382843,
|
|
"learning_rate": 2.597864768683274e-07,
|
|
"loss": -0.0008,
|
|
"num_tokens": 170403422.0,
|
|
"reward": 1.0712890625,
|
|
"reward_std": 0.08325216919183731,
|
|
"rewards/accuracy_reward_conf_tag": 0.572265625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 241
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1383.0,
|
|
"completions/max_terminated_length": 1383.0,
|
|
"completions/mean_length": 123.63671875,
|
|
"completions/mean_terminated_length": 123.63671875,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.7744,
|
|
"grad_norm": 0.25808462500572205,
|
|
"learning_rate": 2.5622775800711744e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 171111804.0,
|
|
"reward": 1.0966796875,
|
|
"reward_std": 0.12197823077440262,
|
|
"rewards/accuracy_reward_conf_tag": 0.59765625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 242
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 336.0,
|
|
"completions/max_terminated_length": 336.0,
|
|
"completions/mean_length": 121.53515625,
|
|
"completions/mean_terminated_length": 121.53515625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.7776,
|
|
"grad_norm": 0.3100128769874573,
|
|
"learning_rate": 2.5266903914590747e-07,
|
|
"loss": 0.0053,
|
|
"num_tokens": 171818726.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.14216691255569458,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 243
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 417.0,
|
|
"completions/max_terminated_length": 417.0,
|
|
"completions/mean_length": 126.056640625,
|
|
"completions/mean_terminated_length": 126.056640625,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.7808,
|
|
"grad_norm": 0.20280516147613525,
|
|
"learning_rate": 2.491103202846975e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 172547843.0,
|
|
"reward": 1.09765625,
|
|
"reward_std": 0.07384681701660156,
|
|
"rewards/accuracy_reward_conf_tag": 0.59765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 244
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 395.0,
|
|
"completions/max_terminated_length": 395.0,
|
|
"completions/mean_length": 128.76171875,
|
|
"completions/mean_terminated_length": 128.76171875,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.18640628457069397,
|
|
"learning_rate": 2.455516014234875e-07,
|
|
"loss": 0.0004,
|
|
"num_tokens": 173272233.0,
|
|
"reward": 0.923828125,
|
|
"reward_std": 0.050305016338825226,
|
|
"rewards/accuracy_reward_conf_tag": 0.423828125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 472.0,
|
|
"completions/max_terminated_length": 472.0,
|
|
"completions/mean_length": 124.55078125,
|
|
"completions/mean_terminated_length": 124.55078125,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.7872,
|
|
"grad_norm": 0.305706262588501,
|
|
"learning_rate": 2.4199288256227755e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 173960371.0,
|
|
"reward": 1.037109375,
|
|
"reward_std": 0.10021258145570755,
|
|
"rewards/accuracy_reward_conf_tag": 0.537109375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 246
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 513.0,
|
|
"completions/max_terminated_length": 513.0,
|
|
"completions/mean_length": 123.5390625,
|
|
"completions/mean_terminated_length": 123.5390625,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.7904,
|
|
"grad_norm": 0.20943109691143036,
|
|
"learning_rate": 2.3843416370106764e-07,
|
|
"loss": -0.0013,
|
|
"num_tokens": 174670167.0,
|
|
"reward": 1.087890625,
|
|
"reward_std": 0.06983967125415802,
|
|
"rewards/accuracy_reward_conf_tag": 0.587890625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 247
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 590.0,
|
|
"completions/max_terminated_length": 590.0,
|
|
"completions/mean_length": 125.927734375,
|
|
"completions/mean_terminated_length": 125.927734375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.7936,
|
|
"grad_norm": 0.20170189440250397,
|
|
"learning_rate": 2.3487544483985764e-07,
|
|
"loss": 0.0037,
|
|
"num_tokens": 175383778.0,
|
|
"reward": 1.09765625,
|
|
"reward_std": 0.08179810643196106,
|
|
"rewards/accuracy_reward_conf_tag": 0.59765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 248
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 718.0,
|
|
"completions/max_terminated_length": 718.0,
|
|
"completions/mean_length": 127.423828125,
|
|
"completions/mean_terminated_length": 127.423828125,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.7968,
|
|
"grad_norm": 0.2695271372795105,
|
|
"learning_rate": 2.313167259786477e-07,
|
|
"loss": -0.0015,
|
|
"num_tokens": 176095803.0,
|
|
"reward": 1.080078125,
|
|
"reward_std": 0.10192251205444336,
|
|
"rewards/accuracy_reward_conf_tag": 0.580078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 249
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 325.0,
|
|
"completions/max_terminated_length": 325.0,
|
|
"completions/mean_length": 129.509765625,
|
|
"completions/mean_terminated_length": 129.509765625,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.27595341205596924,
|
|
"learning_rate": 2.277580071174377e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 176815544.0,
|
|
"reward": 1.095703125,
|
|
"reward_std": 0.09738617390394211,
|
|
"rewards/accuracy_reward_conf_tag": 0.595703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 341.0,
|
|
"completions/max_terminated_length": 341.0,
|
|
"completions/mean_length": 119.283203125,
|
|
"completions/mean_terminated_length": 119.51663208007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.8032,
|
|
"grad_norm": 0.28123873472213745,
|
|
"learning_rate": 2.2419928825622775e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 177533113.0,
|
|
"reward": 1.0654296875,
|
|
"reward_std": 0.11685336381196976,
|
|
"rewards/accuracy_reward_conf_tag": 0.56640625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 251
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 280.0,
|
|
"completions/max_terminated_length": 280.0,
|
|
"completions/mean_length": 121.328125,
|
|
"completions/mean_terminated_length": 121.328125,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.8064,
|
|
"grad_norm": 0.21899612247943878,
|
|
"learning_rate": 2.206405693950178e-07,
|
|
"loss": 0.0024,
|
|
"num_tokens": 178229321.0,
|
|
"reward": 1.06640625,
|
|
"reward_std": 0.0963982343673706,
|
|
"rewards/accuracy_reward_conf_tag": 0.56640625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 252
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 394.0,
|
|
"completions/max_terminated_length": 394.0,
|
|
"completions/mean_length": 130.419921875,
|
|
"completions/mean_terminated_length": 130.419921875,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.8096,
|
|
"grad_norm": 0.271857887506485,
|
|
"learning_rate": 2.170818505338078e-07,
|
|
"loss": 0.004,
|
|
"num_tokens": 178942944.0,
|
|
"reward": 1.0498046875,
|
|
"reward_std": 0.11213028430938721,
|
|
"rewards/accuracy_reward_conf_tag": 0.55078125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 253
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 686.0,
|
|
"completions/max_terminated_length": 686.0,
|
|
"completions/mean_length": 129.70703125,
|
|
"completions/mean_terminated_length": 129.70703125,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.8128,
|
|
"grad_norm": 0.2682909369468689,
|
|
"learning_rate": 2.1352313167259786e-07,
|
|
"loss": 0.0043,
|
|
"num_tokens": 179658858.0,
|
|
"reward": 0.9814453125,
|
|
"reward_std": 0.13150840997695923,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 254
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 331.0,
|
|
"completions/max_terminated_length": 331.0,
|
|
"completions/mean_length": 125.44140625,
|
|
"completions/mean_terminated_length": 125.44140625,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.3045589327812195,
|
|
"learning_rate": 2.099644128113879e-07,
|
|
"loss": -0.0032,
|
|
"num_tokens": 180378580.0,
|
|
"reward": 1.0703125,
|
|
"reward_std": 0.14795516431331635,
|
|
"rewards/accuracy_reward_conf_tag": 0.5703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 423.0,
|
|
"completions/max_terminated_length": 423.0,
|
|
"completions/mean_length": 125.42578125,
|
|
"completions/mean_terminated_length": 125.42578125,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.8192,
|
|
"grad_norm": 0.29352763295173645,
|
|
"learning_rate": 2.0640569395017792e-07,
|
|
"loss": -0.0021,
|
|
"num_tokens": 181081414.0,
|
|
"reward": 1.09375,
|
|
"reward_std": 0.12033502757549286,
|
|
"rewards/accuracy_reward_conf_tag": 0.59375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 256
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 1343.0,
|
|
"completions/max_terminated_length": 1343.0,
|
|
"completions/mean_length": 132.103515625,
|
|
"completions/mean_terminated_length": 132.36203002929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.8224,
|
|
"grad_norm": 0.20613841712474823,
|
|
"learning_rate": 2.0284697508896798e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 181804179.0,
|
|
"reward": 0.9306640625,
|
|
"reward_std": 0.08568236231803894,
|
|
"rewards/accuracy_reward_conf_tag": 0.43359375,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 257
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 370.0,
|
|
"completions/max_terminated_length": 370.0,
|
|
"completions/mean_length": 124.921875,
|
|
"completions/mean_terminated_length": 124.921875,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.8256,
|
|
"grad_norm": 0.24263106286525726,
|
|
"learning_rate": 1.99288256227758e-07,
|
|
"loss": -0.0019,
|
|
"num_tokens": 182538403.0,
|
|
"reward": 1.0625,
|
|
"reward_std": 0.08844450116157532,
|
|
"rewards/accuracy_reward_conf_tag": 0.5625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 258
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 363.0,
|
|
"completions/max_terminated_length": 363.0,
|
|
"completions/mean_length": 131.3359375,
|
|
"completions/mean_terminated_length": 131.3359375,
|
|
"completions/min_length": 58.0,
|
|
"completions/min_terminated_length": 58.0,
|
|
"epoch": 0.8288,
|
|
"grad_norm": 0.28023406863212585,
|
|
"learning_rate": 1.9572953736654804e-07,
|
|
"loss": 0.0016,
|
|
"num_tokens": 183238783.0,
|
|
"reward": 1.005859375,
|
|
"reward_std": 0.12033576518297195,
|
|
"rewards/accuracy_reward_conf_tag": 0.505859375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 259
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 347.0,
|
|
"completions/max_terminated_length": 347.0,
|
|
"completions/mean_length": 130.841796875,
|
|
"completions/mean_terminated_length": 130.841796875,
|
|
"completions/min_length": 66.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.23595190048217773,
|
|
"learning_rate": 1.9217081850533807e-07,
|
|
"loss": -0.0023,
|
|
"num_tokens": 183928390.0,
|
|
"reward": 1.0,
|
|
"reward_std": 0.07897168397903442,
|
|
"rewards/accuracy_reward_conf_tag": 0.5,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 132.7421875,
|
|
"completions/mean_terminated_length": 132.7421875,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.8352,
|
|
"grad_norm": 0.25339820981025696,
|
|
"learning_rate": 1.8861209964412812e-07,
|
|
"loss": 0.0073,
|
|
"num_tokens": 184639042.0,
|
|
"reward": 1.03515625,
|
|
"reward_std": 0.0850832611322403,
|
|
"rewards/accuracy_reward_conf_tag": 0.53515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 261
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 537.0,
|
|
"completions/max_terminated_length": 537.0,
|
|
"completions/mean_length": 126.7734375,
|
|
"completions/mean_terminated_length": 126.7734375,
|
|
"completions/min_length": 63.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.8384,
|
|
"grad_norm": 0.26170578598976135,
|
|
"learning_rate": 1.8505338078291812e-07,
|
|
"loss": 0.0058,
|
|
"num_tokens": 185352278.0,
|
|
"reward": 1.009765625,
|
|
"reward_std": 0.11980339139699936,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 262
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 388.0,
|
|
"completions/max_terminated_length": 388.0,
|
|
"completions/mean_length": 128.8515625,
|
|
"completions/mean_terminated_length": 129.1037139892578,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.8416,
|
|
"grad_norm": 0.3050854206085205,
|
|
"learning_rate": 1.8149466192170818e-07,
|
|
"loss": -0.0018,
|
|
"num_tokens": 186054970.0,
|
|
"reward": 1.0478515625,
|
|
"reward_std": 0.1286795437335968,
|
|
"rewards/accuracy_reward_conf_tag": 0.548828125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 263
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 510.0,
|
|
"completions/max_terminated_length": 510.0,
|
|
"completions/mean_length": 135.689453125,
|
|
"completions/mean_terminated_length": 135.689453125,
|
|
"completions/min_length": 48.0,
|
|
"completions/min_terminated_length": 48.0,
|
|
"epoch": 0.8448,
|
|
"grad_norm": 0.2786458730697632,
|
|
"learning_rate": 1.7793594306049823e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 186763307.0,
|
|
"reward": 0.9853515625,
|
|
"reward_std": 0.12059161812067032,
|
|
"rewards/accuracy_reward_conf_tag": 0.486328125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 264
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 855.0,
|
|
"completions/max_terminated_length": 855.0,
|
|
"completions/mean_length": 132.556640625,
|
|
"completions/mean_terminated_length": 132.556640625,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.2970069348812103,
|
|
"learning_rate": 1.7437722419928824e-07,
|
|
"loss": -0.0031,
|
|
"num_tokens": 187485848.0,
|
|
"reward": 1.037109375,
|
|
"reward_std": 0.08956026285886765,
|
|
"rewards/accuracy_reward_conf_tag": 0.537109375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 638.0,
|
|
"completions/max_terminated_length": 638.0,
|
|
"completions/mean_length": 128.80078125,
|
|
"completions/mean_terminated_length": 128.80078125,
|
|
"completions/min_length": 55.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.8512,
|
|
"grad_norm": 0.260507196187973,
|
|
"learning_rate": 1.708185053380783e-07,
|
|
"loss": 0.001,
|
|
"num_tokens": 188195370.0,
|
|
"reward": 1.009765625,
|
|
"reward_std": 0.11849410086870193,
|
|
"rewards/accuracy_reward_conf_tag": 0.509765625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 266
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 532.0,
|
|
"completions/max_terminated_length": 532.0,
|
|
"completions/mean_length": 132.08984375,
|
|
"completions/mean_terminated_length": 132.08984375,
|
|
"completions/min_length": 49.0,
|
|
"completions/min_terminated_length": 49.0,
|
|
"epoch": 0.8544,
|
|
"grad_norm": 0.3098877966403961,
|
|
"learning_rate": 1.6725978647686832e-07,
|
|
"loss": 0.0018,
|
|
"num_tokens": 188898424.0,
|
|
"reward": 1.0029296875,
|
|
"reward_std": 0.10540418326854706,
|
|
"rewards/accuracy_reward_conf_tag": 0.50390625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 267
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 133.64453125,
|
|
"completions/mean_terminated_length": 133.64453125,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.8576,
|
|
"grad_norm": 0.8424309492111206,
|
|
"learning_rate": 1.6370106761565835e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 189603234.0,
|
|
"reward": 1.05078125,
|
|
"reward_std": 0.13835257291793823,
|
|
"rewards/accuracy_reward_conf_tag": 0.55078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 268
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 665.0,
|
|
"completions/max_terminated_length": 665.0,
|
|
"completions/mean_length": 131.474609375,
|
|
"completions/mean_terminated_length": 131.474609375,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.8608,
|
|
"grad_norm": 0.20501279830932617,
|
|
"learning_rate": 1.601423487544484e-07,
|
|
"loss": 0.0007,
|
|
"num_tokens": 190310901.0,
|
|
"reward": 1.1640625,
|
|
"reward_std": 0.08462892472743988,
|
|
"rewards/accuracy_reward_conf_tag": 0.6640625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 269
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 382.0,
|
|
"completions/max_terminated_length": 382.0,
|
|
"completions/mean_length": 132.26171875,
|
|
"completions/mean_terminated_length": 132.26171875,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.2276669442653656,
|
|
"learning_rate": 1.5658362989323843e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 191037267.0,
|
|
"reward": 1.12109375,
|
|
"reward_std": 0.0862782895565033,
|
|
"rewards/accuracy_reward_conf_tag": 0.62109375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 426.0,
|
|
"completions/max_terminated_length": 426.0,
|
|
"completions/mean_length": 140.5859375,
|
|
"completions/mean_terminated_length": 140.5859375,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.8672,
|
|
"grad_norm": 0.20950470864772797,
|
|
"learning_rate": 1.5302491103202846e-07,
|
|
"loss": 0.0059,
|
|
"num_tokens": 191780447.0,
|
|
"reward": 0.92578125,
|
|
"reward_std": 0.09186190366744995,
|
|
"rewards/accuracy_reward_conf_tag": 0.42578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 271
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 512.0,
|
|
"completions/max_terminated_length": 512.0,
|
|
"completions/mean_length": 128.564453125,
|
|
"completions/mean_terminated_length": 128.564453125,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.8704,
|
|
"grad_norm": 0.2311246246099472,
|
|
"learning_rate": 1.494661921708185e-07,
|
|
"loss": -0.0011,
|
|
"num_tokens": 192514872.0,
|
|
"reward": 1.0546875,
|
|
"reward_std": 0.10061199218034744,
|
|
"rewards/accuracy_reward_conf_tag": 0.5546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 272
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 406.0,
|
|
"completions/max_terminated_length": 406.0,
|
|
"completions/mean_length": 137.228515625,
|
|
"completions/mean_terminated_length": 137.228515625,
|
|
"completions/min_length": 44.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.8736,
|
|
"grad_norm": 0.2300596535205841,
|
|
"learning_rate": 1.4590747330960855e-07,
|
|
"loss": 0.0027,
|
|
"num_tokens": 193238885.0,
|
|
"reward": 0.984375,
|
|
"reward_std": 0.095276840031147,
|
|
"rewards/accuracy_reward_conf_tag": 0.484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 273
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 500.0,
|
|
"completions/max_terminated_length": 500.0,
|
|
"completions/mean_length": 132.6796875,
|
|
"completions/mean_terminated_length": 132.6796875,
|
|
"completions/min_length": 55.0,
|
|
"completions/min_terminated_length": 55.0,
|
|
"epoch": 0.8768,
|
|
"grad_norm": 0.25587886571884155,
|
|
"learning_rate": 1.4234875444839858e-07,
|
|
"loss": -0.0016,
|
|
"num_tokens": 193957137.0,
|
|
"reward": 0.986328125,
|
|
"reward_std": 0.1152089387178421,
|
|
"rewards/accuracy_reward_conf_tag": 0.486328125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 274
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 120.36328125,
|
|
"completions/mean_terminated_length": 120.36328125,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 52.0,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.21815632283687592,
|
|
"learning_rate": 1.387900355871886e-07,
|
|
"loss": 0.0057,
|
|
"num_tokens": 194629323.0,
|
|
"reward": 1.060546875,
|
|
"reward_std": 0.08364100009202957,
|
|
"rewards/accuracy_reward_conf_tag": 0.560546875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 540.0,
|
|
"completions/max_terminated_length": 540.0,
|
|
"completions/mean_length": 130.677734375,
|
|
"completions/mean_terminated_length": 130.677734375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.8832,
|
|
"grad_norm": 0.20837943255901337,
|
|
"learning_rate": 1.3523131672597866e-07,
|
|
"loss": -0.002,
|
|
"num_tokens": 195334222.0,
|
|
"reward": 0.9306640625,
|
|
"reward_std": 0.09304757416248322,
|
|
"rewards/accuracy_reward_conf_tag": 0.431640625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 276
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 399.0,
|
|
"completions/max_terminated_length": 399.0,
|
|
"completions/mean_length": 137.671875,
|
|
"completions/mean_terminated_length": 137.671875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.8864,
|
|
"grad_norm": 0.2613351345062256,
|
|
"learning_rate": 1.3167259786476866e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 196065062.0,
|
|
"reward": 1.08203125,
|
|
"reward_std": 0.10106877237558365,
|
|
"rewards/accuracy_reward_conf_tag": 0.58203125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 277
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 407.0,
|
|
"completions/max_terminated_length": 407.0,
|
|
"completions/mean_length": 137.93359375,
|
|
"completions/mean_terminated_length": 137.93359375,
|
|
"completions/min_length": 61.0,
|
|
"completions/min_terminated_length": 61.0,
|
|
"epoch": 0.8896,
|
|
"grad_norm": 0.3071613013744354,
|
|
"learning_rate": 1.2811387900355872e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 196782428.0,
|
|
"reward": 1.015625,
|
|
"reward_std": 0.16346396505832672,
|
|
"rewards/accuracy_reward_conf_tag": 0.515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 278
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 607.0,
|
|
"completions/max_terminated_length": 607.0,
|
|
"completions/mean_length": 137.916015625,
|
|
"completions/mean_terminated_length": 137.916015625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.8928,
|
|
"grad_norm": 0.21012061834335327,
|
|
"learning_rate": 1.2455516014234875e-07,
|
|
"loss": 0.0021,
|
|
"num_tokens": 197517377.0,
|
|
"reward": 1.0166015625,
|
|
"reward_std": 0.1000141054391861,
|
|
"rewards/accuracy_reward_conf_tag": 0.517578125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 279
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 524.0,
|
|
"completions/max_terminated_length": 524.0,
|
|
"completions/mean_length": 135.2890625,
|
|
"completions/mean_terminated_length": 135.2890625,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.2502855062484741,
|
|
"learning_rate": 1.2099644128113878e-07,
|
|
"loss": -0.0002,
|
|
"num_tokens": 198222629.0,
|
|
"reward": 1.017578125,
|
|
"reward_std": 0.11737515777349472,
|
|
"rewards/accuracy_reward_conf_tag": 0.517578125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 758.0,
|
|
"completions/max_terminated_length": 758.0,
|
|
"completions/mean_length": 134.283203125,
|
|
"completions/mean_terminated_length": 134.283203125,
|
|
"completions/min_length": 64.0,
|
|
"completions/min_terminated_length": 64.0,
|
|
"epoch": 0.8992,
|
|
"grad_norm": 0.27686265110969543,
|
|
"learning_rate": 1.1743772241992882e-07,
|
|
"loss": -0.0033,
|
|
"num_tokens": 198921430.0,
|
|
"reward": 0.966796875,
|
|
"reward_std": 0.11224833130836487,
|
|
"rewards/accuracy_reward_conf_tag": 0.466796875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 281
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 378.0,
|
|
"completions/max_terminated_length": 378.0,
|
|
"completions/mean_length": 136.4140625,
|
|
"completions/mean_terminated_length": 136.4140625,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.9024,
|
|
"grad_norm": 0.2457011491060257,
|
|
"learning_rate": 1.1387900355871885e-07,
|
|
"loss": 0.0056,
|
|
"num_tokens": 199636650.0,
|
|
"reward": 0.9921875,
|
|
"reward_std": 0.11264772713184357,
|
|
"rewards/accuracy_reward_conf_tag": 0.4921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 282
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 504.0,
|
|
"completions/max_terminated_length": 504.0,
|
|
"completions/mean_length": 138.45703125,
|
|
"completions/mean_terminated_length": 138.45703125,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.9056,
|
|
"grad_norm": 0.2855152189731598,
|
|
"learning_rate": 1.103202846975089e-07,
|
|
"loss": 0.0032,
|
|
"num_tokens": 200351876.0,
|
|
"reward": 1.1083984375,
|
|
"reward_std": 0.15104307234287262,
|
|
"rewards/accuracy_reward_conf_tag": 0.609375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 283
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 729.0,
|
|
"completions/max_terminated_length": 729.0,
|
|
"completions/mean_length": 143.796875,
|
|
"completions/mean_terminated_length": 143.796875,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.9088,
|
|
"grad_norm": 0.28002673387527466,
|
|
"learning_rate": 1.0676156583629893e-07,
|
|
"loss": 0.0082,
|
|
"num_tokens": 201082116.0,
|
|
"reward": 0.9921875,
|
|
"reward_std": 0.151961088180542,
|
|
"rewards/accuracy_reward_conf_tag": 0.4921875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 284
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 337.0,
|
|
"completions/max_terminated_length": 337.0,
|
|
"completions/mean_length": 130.859375,
|
|
"completions/mean_terminated_length": 130.859375,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.22129687666893005,
|
|
"learning_rate": 1.0320284697508896e-07,
|
|
"loss": -0.0044,
|
|
"num_tokens": 201803196.0,
|
|
"reward": 0.9873046875,
|
|
"reward_std": 0.10054770857095718,
|
|
"rewards/accuracy_reward_conf_tag": 0.48828125,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 450.0,
|
|
"completions/max_terminated_length": 450.0,
|
|
"completions/mean_length": 138.8359375,
|
|
"completions/mean_terminated_length": 138.8359375,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.9152,
|
|
"grad_norm": 0.2536018490791321,
|
|
"learning_rate": 9.9644128113879e-08,
|
|
"loss": -0.0031,
|
|
"num_tokens": 202523944.0,
|
|
"reward": 0.994140625,
|
|
"reward_std": 0.1057380810379982,
|
|
"rewards/accuracy_reward_conf_tag": 0.494140625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 286
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 570.0,
|
|
"completions/max_terminated_length": 570.0,
|
|
"completions/mean_length": 139.84375,
|
|
"completions/mean_terminated_length": 139.84375,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.9184,
|
|
"grad_norm": 0.20366545021533966,
|
|
"learning_rate": 9.608540925266903e-08,
|
|
"loss": -0.0026,
|
|
"num_tokens": 203252656.0,
|
|
"reward": 1.080078125,
|
|
"reward_std": 0.060823142528533936,
|
|
"rewards/accuracy_reward_conf_tag": 0.580078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 287
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 532.0,
|
|
"completions/max_terminated_length": 532.0,
|
|
"completions/mean_length": 137.982421875,
|
|
"completions/mean_terminated_length": 137.982421875,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.9216,
|
|
"grad_norm": 0.2818716764450073,
|
|
"learning_rate": 9.252669039145906e-08,
|
|
"loss": 0.0007,
|
|
"num_tokens": 203976887.0,
|
|
"reward": 1.0224609375,
|
|
"reward_std": 0.10212098807096481,
|
|
"rewards/accuracy_reward_conf_tag": 0.5234375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 288
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 409.0,
|
|
"completions/max_terminated_length": 409.0,
|
|
"completions/mean_length": 139.19921875,
|
|
"completions/mean_terminated_length": 139.19921875,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.9248,
|
|
"grad_norm": 0.24427424371242523,
|
|
"learning_rate": 8.896797153024912e-08,
|
|
"loss": 0.0003,
|
|
"num_tokens": 204663757.0,
|
|
"reward": 0.96875,
|
|
"reward_std": 0.07878133654594421,
|
|
"rewards/accuracy_reward_conf_tag": 0.46875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 289
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 573.0,
|
|
"completions/max_terminated_length": 573.0,
|
|
"completions/mean_length": 137.80859375,
|
|
"completions/mean_terminated_length": 137.80859375,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.2052888423204422,
|
|
"learning_rate": 8.540925266903915e-08,
|
|
"loss": 0.002,
|
|
"num_tokens": 205382699.0,
|
|
"reward": 1.029296875,
|
|
"reward_std": 0.07995961606502533,
|
|
"rewards/accuracy_reward_conf_tag": 0.529296875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 479.0,
|
|
"completions/max_terminated_length": 479.0,
|
|
"completions/mean_length": 131.904296875,
|
|
"completions/mean_terminated_length": 131.904296875,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.9312,
|
|
"grad_norm": 0.31603914499282837,
|
|
"learning_rate": 8.185053380782917e-08,
|
|
"loss": 0.0013,
|
|
"num_tokens": 206098186.0,
|
|
"reward": 0.93359375,
|
|
"reward_std": 0.14637748897075653,
|
|
"rewards/accuracy_reward_conf_tag": 0.43359375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 291
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 355.0,
|
|
"completions/max_terminated_length": 355.0,
|
|
"completions/mean_length": 137.095703125,
|
|
"completions/mean_terminated_length": 137.095703125,
|
|
"completions/min_length": 69.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.9344,
|
|
"grad_norm": 0.24164921045303345,
|
|
"learning_rate": 7.829181494661922e-08,
|
|
"loss": 0.0022,
|
|
"num_tokens": 206839555.0,
|
|
"reward": 1.033203125,
|
|
"reward_std": 0.09001900255680084,
|
|
"rewards/accuracy_reward_conf_tag": 0.533203125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 292
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 461.0,
|
|
"completions/max_terminated_length": 461.0,
|
|
"completions/mean_length": 135.4296875,
|
|
"completions/mean_terminated_length": 135.4296875,
|
|
"completions/min_length": 69.0,
|
|
"completions/min_terminated_length": 69.0,
|
|
"epoch": 0.9376,
|
|
"grad_norm": 0.23846471309661865,
|
|
"learning_rate": 7.473309608540925e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 207519159.0,
|
|
"reward": 0.9951171875,
|
|
"reward_std": 0.09475381672382355,
|
|
"rewards/accuracy_reward_conf_tag": 0.49609375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 293
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 530.0,
|
|
"completions/max_terminated_length": 530.0,
|
|
"completions/mean_length": 142.353515625,
|
|
"completions/mean_terminated_length": 142.353515625,
|
|
"completions/min_length": 65.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.9408,
|
|
"grad_norm": 0.27026620507240295,
|
|
"learning_rate": 7.117437722419929e-08,
|
|
"loss": 0.0068,
|
|
"num_tokens": 208250244.0,
|
|
"reward": 1.068359375,
|
|
"reward_std": 0.16071240603923798,
|
|
"rewards/accuracy_reward_conf_tag": 0.568359375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 294
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 576.0,
|
|
"completions/max_terminated_length": 576.0,
|
|
"completions/mean_length": 135.0390625,
|
|
"completions/mean_terminated_length": 135.0390625,
|
|
"completions/min_length": 60.0,
|
|
"completions/min_terminated_length": 60.0,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.3178434669971466,
|
|
"learning_rate": 6.761565836298933e-08,
|
|
"loss": -0.0002,
|
|
"num_tokens": 208943288.0,
|
|
"reward": 1.068359375,
|
|
"reward_std": 0.11987947672605515,
|
|
"rewards/accuracy_reward_conf_tag": 0.568359375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 464.0,
|
|
"completions/max_terminated_length": 464.0,
|
|
"completions/mean_length": 142.169921875,
|
|
"completions/mean_terminated_length": 142.169921875,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.9472,
|
|
"grad_norm": 0.2595904469490051,
|
|
"learning_rate": 6.405693950177936e-08,
|
|
"loss": -0.0022,
|
|
"num_tokens": 209643231.0,
|
|
"reward": 1.0078125,
|
|
"reward_std": 0.07411079853773117,
|
|
"rewards/accuracy_reward_conf_tag": 0.5078125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 296
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 434.0,
|
|
"completions/max_terminated_length": 434.0,
|
|
"completions/mean_length": 132.931640625,
|
|
"completions/mean_terminated_length": 132.931640625,
|
|
"completions/min_length": 47.0,
|
|
"completions/min_terminated_length": 47.0,
|
|
"epoch": 0.9504,
|
|
"grad_norm": 0.33172011375427246,
|
|
"learning_rate": 6.049822064056939e-08,
|
|
"loss": -0.0007,
|
|
"num_tokens": 210365900.0,
|
|
"reward": 1.041015625,
|
|
"reward_std": 0.11961549520492554,
|
|
"rewards/accuracy_reward_conf_tag": 0.541015625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 297
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 465.0,
|
|
"completions/max_terminated_length": 465.0,
|
|
"completions/mean_length": 138.744140625,
|
|
"completions/mean_terminated_length": 138.744140625,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.9536,
|
|
"grad_norm": 0.43559926748275757,
|
|
"learning_rate": 5.6939501779359424e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 211094905.0,
|
|
"reward": 1.033203125,
|
|
"reward_std": 0.11225028336048126,
|
|
"rewards/accuracy_reward_conf_tag": 0.533203125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 298
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 377.0,
|
|
"completions/max_terminated_length": 377.0,
|
|
"completions/mean_length": 141.1171875,
|
|
"completions/mean_terminated_length": 141.1171875,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 56.0,
|
|
"epoch": 0.9568,
|
|
"grad_norm": 0.2028070092201233,
|
|
"learning_rate": 5.3380782918149466e-08,
|
|
"loss": -0.0035,
|
|
"num_tokens": 211801477.0,
|
|
"reward": 1.021484375,
|
|
"reward_std": 0.06832009553909302,
|
|
"rewards/accuracy_reward_conf_tag": 0.521484375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 299
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 518.0,
|
|
"completions/max_terminated_length": 518.0,
|
|
"completions/mean_length": 141.8984375,
|
|
"completions/mean_terminated_length": 141.8984375,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.21485182642936707,
|
|
"learning_rate": 4.98220640569395e-08,
|
|
"loss": -0.0005,
|
|
"num_tokens": 212502841.0,
|
|
"reward": 1.001953125,
|
|
"reward_std": 0.07397978752851486,
|
|
"rewards/accuracy_reward_conf_tag": 0.501953125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 559.0,
|
|
"completions/max_terminated_length": 559.0,
|
|
"completions/mean_length": 144.912109375,
|
|
"completions/mean_terminated_length": 144.912109375,
|
|
"completions/min_length": 54.0,
|
|
"completions/min_terminated_length": 54.0,
|
|
"epoch": 0.9632,
|
|
"grad_norm": 0.26152896881103516,
|
|
"learning_rate": 4.626334519572953e-08,
|
|
"loss": 0.0046,
|
|
"num_tokens": 213212836.0,
|
|
"reward": 0.978515625,
|
|
"reward_std": 0.13164877891540527,
|
|
"rewards/accuracy_reward_conf_tag": 0.478515625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 301
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 438.0,
|
|
"completions/max_terminated_length": 438.0,
|
|
"completions/mean_length": 130.357421875,
|
|
"completions/mean_terminated_length": 130.357421875,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.9664,
|
|
"grad_norm": 0.24077603220939636,
|
|
"learning_rate": 4.270462633451957e-08,
|
|
"loss": 0.0002,
|
|
"num_tokens": 213912603.0,
|
|
"reward": 1.0341796875,
|
|
"reward_std": 0.08963698148727417,
|
|
"rewards/accuracy_reward_conf_tag": 0.53515625,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 302
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1101.0,
|
|
"completions/max_terminated_length": 1101.0,
|
|
"completions/mean_length": 142.625,
|
|
"completions/mean_terminated_length": 142.625,
|
|
"completions/min_length": 66.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.9696,
|
|
"grad_norm": 0.2666417062282562,
|
|
"learning_rate": 3.914590747330961e-08,
|
|
"loss": -0.0014,
|
|
"num_tokens": 214628307.0,
|
|
"reward": 1.056640625,
|
|
"reward_std": 0.13808491826057434,
|
|
"rewards/accuracy_reward_conf_tag": 0.556640625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 303
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 1176.0,
|
|
"completions/max_terminated_length": 1176.0,
|
|
"completions/mean_length": 143.8515625,
|
|
"completions/mean_terminated_length": 144.13307189941406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.9728,
|
|
"grad_norm": 0.26070520281791687,
|
|
"learning_rate": 3.5587188612099644e-08,
|
|
"loss": -0.0014,
|
|
"num_tokens": 215345047.0,
|
|
"reward": 1.0087890625,
|
|
"reward_std": 0.07727016508579254,
|
|
"rewards/accuracy_reward_conf_tag": 0.51171875,
|
|
"rewards/format_reward_conf_tag": 0.994140625,
|
|
"step": 304
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001953125,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 139.166015625,
|
|
"completions/mean_terminated_length": 139.4383544921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.30899903178215027,
|
|
"learning_rate": 3.202846975088968e-08,
|
|
"loss": 0.0039,
|
|
"num_tokens": 216044764.0,
|
|
"reward": 1.0908203125,
|
|
"reward_std": 0.14591380953788757,
|
|
"rewards/accuracy_reward_conf_tag": 0.591796875,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 438.0,
|
|
"completions/max_terminated_length": 438.0,
|
|
"completions/mean_length": 131.318359375,
|
|
"completions/mean_terminated_length": 131.318359375,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.9792,
|
|
"grad_norm": 0.37437206506729126,
|
|
"learning_rate": 2.8469750889679712e-08,
|
|
"loss": -0.0001,
|
|
"num_tokens": 216762751.0,
|
|
"reward": 0.95703125,
|
|
"reward_std": 0.11014340817928314,
|
|
"rewards/accuracy_reward_conf_tag": 0.45703125,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 306
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 462.0,
|
|
"completions/max_terminated_length": 462.0,
|
|
"completions/mean_length": 135.419921875,
|
|
"completions/mean_terminated_length": 135.419921875,
|
|
"completions/min_length": 51.0,
|
|
"completions/min_terminated_length": 51.0,
|
|
"epoch": 0.9824,
|
|
"grad_norm": 0.19477730989456177,
|
|
"learning_rate": 2.491103202846975e-08,
|
|
"loss": -0.0008,
|
|
"num_tokens": 217495214.0,
|
|
"reward": 1.072265625,
|
|
"reward_std": 0.06549368053674698,
|
|
"rewards/accuracy_reward_conf_tag": 0.572265625,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 307
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 538.0,
|
|
"completions/max_terminated_length": 538.0,
|
|
"completions/mean_length": 152.345703125,
|
|
"completions/mean_terminated_length": 152.345703125,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.9856,
|
|
"grad_norm": 0.2829309105873108,
|
|
"learning_rate": 2.1352313167259786e-08,
|
|
"loss": 0.003,
|
|
"num_tokens": 218226655.0,
|
|
"reward": 0.990234375,
|
|
"reward_std": 0.15097317099571228,
|
|
"rewards/accuracy_reward_conf_tag": 0.490234375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 308
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 413.0,
|
|
"completions/max_terminated_length": 413.0,
|
|
"completions/mean_length": 138.583984375,
|
|
"completions/mean_terminated_length": 138.583984375,
|
|
"completions/min_length": 63.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.9888,
|
|
"grad_norm": 0.19521859288215637,
|
|
"learning_rate": 1.7793594306049822e-08,
|
|
"loss": -0.0003,
|
|
"num_tokens": 218934202.0,
|
|
"reward": 0.982421875,
|
|
"reward_std": 0.08219750225543976,
|
|
"rewards/accuracy_reward_conf_tag": 0.482421875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 309
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 418.0,
|
|
"completions/max_terminated_length": 418.0,
|
|
"completions/mean_length": 137.216796875,
|
|
"completions/mean_terminated_length": 137.216796875,
|
|
"completions/min_length": 43.0,
|
|
"completions/min_terminated_length": 43.0,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.2574136555194855,
|
|
"learning_rate": 1.4234875444839856e-08,
|
|
"loss": 0.0034,
|
|
"num_tokens": 219650329.0,
|
|
"reward": 1.0517578125,
|
|
"reward_std": 0.10580358654260635,
|
|
"rewards/accuracy_reward_conf_tag": 0.552734375,
|
|
"rewards/format_reward_conf_tag": 0.998046875,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 431.0,
|
|
"completions/max_terminated_length": 431.0,
|
|
"completions/mean_length": 134.279296875,
|
|
"completions/mean_terminated_length": 134.279296875,
|
|
"completions/min_length": 65.0,
|
|
"completions/min_terminated_length": 65.0,
|
|
"epoch": 0.9952,
|
|
"grad_norm": 0.2528088390827179,
|
|
"learning_rate": 1.0676156583629893e-08,
|
|
"loss": 0.0028,
|
|
"num_tokens": 220359216.0,
|
|
"reward": 1.02734375,
|
|
"reward_std": 0.1255941092967987,
|
|
"rewards/accuracy_reward_conf_tag": 0.52734375,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 311
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 239.0,
|
|
"completions/max_terminated_length": 239.0,
|
|
"completions/mean_length": 120.03125,
|
|
"completions/mean_terminated_length": 120.03125,
|
|
"completions/min_length": 70.0,
|
|
"completions/min_terminated_length": 70.0,
|
|
"epoch": 0.9984,
|
|
"grad_norm": 0.2555108964443207,
|
|
"learning_rate": 7.117437722419928e-09,
|
|
"loss": 0.0029,
|
|
"num_tokens": 221078914.0,
|
|
"reward": 1.013671875,
|
|
"reward_std": 0.1146785318851471,
|
|
"rewards/accuracy_reward_conf_tag": 0.513671875,
|
|
"rewards/format_reward_conf_tag": 1.0,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.9984,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.001158178178882689,
|
|
"train_runtime": 16424.5215,
|
|
"train_samples_per_second": 1.218,
|
|
"train_steps_per_second": 0.019
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 313,
|
|
"num_input_tokens_seen": 221078914,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|