7043 lines
251 KiB
JSON
7043 lines
251 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.32,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.26171875,
|
|
"completions/max_length": 1504.0,
|
|
"completions/max_terminated_length": 1504.0,
|
|
"completions/mean_length": 303.0,
|
|
"completions/mean_terminated_length": 410.4126892089844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0016,
|
|
"grad_norm": 0.005753066390752792,
|
|
"learning_rate": 1e-07,
|
|
"loss": -0.0217,
|
|
"num_tokens": 392512.0,
|
|
"reward": 0.04026263207197189,
|
|
"reward_std": 0.09501844644546509,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.018024610355496407,
|
|
"rewards/confidence_one_or_zero": 0.0625,
|
|
"rewards/format_reward": 0.0625,
|
|
"rewards/mean_confidence_reward": 0.26346302032470703,
|
|
"step": 1
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.30859375,
|
|
"completions/max_length": 1514.0,
|
|
"completions/max_terminated_length": 1514.0,
|
|
"completions/mean_length": 341.23828125,
|
|
"completions/mean_terminated_length": 493.5423889160156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0032,
|
|
"grad_norm": 0.006315574515610933,
|
|
"learning_rate": 2e-07,
|
|
"loss": -0.0254,
|
|
"num_tokens": 816933.0,
|
|
"reward": 0.07134318351745605,
|
|
"reward_std": 0.16403131186962128,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.029404297471046448,
|
|
"rewards/confidence_one_or_zero": 0.07421875,
|
|
"rewards/format_reward": 0.11328125,
|
|
"rewards/mean_confidence_reward": 0.3325389623641968,
|
|
"step": 2
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.2421875,
|
|
"completions/max_length": 1514.0,
|
|
"completions/max_terminated_length": 1514.0,
|
|
"completions/mean_length": 321.2421875,
|
|
"completions/mean_terminated_length": 423.9071960449219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0048,
|
|
"grad_norm": 0.005065033212304115,
|
|
"learning_rate": 3e-07,
|
|
"loss": -0.0145,
|
|
"num_tokens": 1229651.0,
|
|
"reward": 0.047541044652462006,
|
|
"reward_std": 0.10478618741035461,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.02086273394525051,
|
|
"rewards/confidence_one_or_zero": 0.06640625,
|
|
"rewards/format_reward": 0.0703125,
|
|
"rewards/mean_confidence_reward": 0.23426908254623413,
|
|
"step": 3
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.27734375,
|
|
"completions/max_length": 1514.0,
|
|
"completions/max_terminated_length": 1514.0,
|
|
"completions/mean_length": 366.34765625,
|
|
"completions/mean_terminated_length": 506.9459533691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0064,
|
|
"grad_norm": 0.006735912058502436,
|
|
"learning_rate": 4e-07,
|
|
"loss": -0.0488,
|
|
"num_tokens": 1666212.0,
|
|
"reward": 0.04722738265991211,
|
|
"reward_std": 0.10730428993701935,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.020235449075698853,
|
|
"rewards/confidence_one_or_zero": 0.04296875,
|
|
"rewards/format_reward": 0.0703125,
|
|
"rewards/mean_confidence_reward": 0.23549126088619232,
|
|
"step": 4
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25390625,
|
|
"completions/max_length": 1522.0,
|
|
"completions/max_terminated_length": 1522.0,
|
|
"completions/mean_length": 337.83203125,
|
|
"completions/mean_terminated_length": 452.8010559082031,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.008,
|
|
"grad_norm": 0.005995773710310459,
|
|
"learning_rate": 5e-07,
|
|
"loss": -0.0337,
|
|
"num_tokens": 2104033.0,
|
|
"reward": 0.05744408816099167,
|
|
"reward_std": 0.13524429500102997,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.017231373116374016,
|
|
"rewards/confidence_one_or_zero": 0.0546875,
|
|
"rewards/format_reward": 0.09765625,
|
|
"rewards/mean_confidence_reward": 0.22458529472351074,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.30078125,
|
|
"completions/max_length": 1535.0,
|
|
"completions/max_terminated_length": 1535.0,
|
|
"completions/mean_length": 308.78515625,
|
|
"completions/mean_terminated_length": 441.614501953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0096,
|
|
"grad_norm": 0.005563064943999052,
|
|
"learning_rate": 6e-07,
|
|
"loss": -0.0162,
|
|
"num_tokens": 2526522.0,
|
|
"reward": 0.03998662531375885,
|
|
"reward_std": 0.10862339287996292,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.017472656443715096,
|
|
"rewards/confidence_one_or_zero": 0.078125,
|
|
"rewards/format_reward": 0.0625,
|
|
"rewards/mean_confidence_reward": 0.21755936741828918,
|
|
"step": 6
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.26953125,
|
|
"completions/max_length": 1533.0,
|
|
"completions/max_terminated_length": 1533.0,
|
|
"completions/mean_length": 319.87890625,
|
|
"completions/mean_terminated_length": 437.9090881347656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0112,
|
|
"grad_norm": 0.005541341844946146,
|
|
"learning_rate": 7e-07,
|
|
"loss": -0.0218,
|
|
"num_tokens": 2943667.0,
|
|
"reward": 0.04273135960102081,
|
|
"reward_std": 0.09829111397266388,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.01905578374862671,
|
|
"rewards/confidence_one_or_zero": 0.0625,
|
|
"rewards/format_reward": 0.06640625,
|
|
"rewards/mean_confidence_reward": 0.2791077792644501,
|
|
"step": 7
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.28515625,
|
|
"completions/max_length": 1501.0,
|
|
"completions/max_terminated_length": 1501.0,
|
|
"completions/mean_length": 291.1328125,
|
|
"completions/mean_terminated_length": 407.26776123046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0128,
|
|
"grad_norm": 0.00616063317283988,
|
|
"learning_rate": 8e-07,
|
|
"loss": -0.0247,
|
|
"num_tokens": 3338573.0,
|
|
"reward": 0.05297835171222687,
|
|
"reward_std": 0.12713265419006348,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.020018475130200386,
|
|
"rewards/confidence_one_or_zero": 0.07421875,
|
|
"rewards/format_reward": 0.0859375,
|
|
"rewards/mean_confidence_reward": 0.2880028486251831,
|
|
"step": 8
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.30078125,
|
|
"completions/max_length": 1511.0,
|
|
"completions/max_terminated_length": 1511.0,
|
|
"completions/mean_length": 331.2890625,
|
|
"completions/mean_terminated_length": 473.7988586425781,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0144,
|
|
"grad_norm": 0.004324339330196381,
|
|
"learning_rate": 9e-07,
|
|
"loss": -0.0157,
|
|
"num_tokens": 3772351.0,
|
|
"reward": 0.03322942554950714,
|
|
"reward_std": 0.08382241427898407,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.01567695289850235,
|
|
"rewards/confidence_one_or_zero": 0.078125,
|
|
"rewards/format_reward": 0.05078125,
|
|
"rewards/mean_confidence_reward": 0.24392913281917572,
|
|
"step": 9
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.27734375,
|
|
"completions/max_length": 1512.0,
|
|
"completions/max_terminated_length": 1512.0,
|
|
"completions/mean_length": 323.51953125,
|
|
"completions/mean_terminated_length": 447.68109130859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.008059307001531124,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0296,
|
|
"num_tokens": 4190820.0,
|
|
"reward": 0.04703688248991966,
|
|
"reward_std": 0.11515301465988159,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.01594802923500538,
|
|
"rewards/confidence_one_or_zero": 0.07421875,
|
|
"rewards/format_reward": 0.078125,
|
|
"rewards/mean_confidence_reward": 0.29039186239242554,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.30078125,
|
|
"completions/max_length": 1532.0,
|
|
"completions/max_terminated_length": 1532.0,
|
|
"completions/mean_length": 283.08203125,
|
|
"completions/mean_terminated_length": 404.854736328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0176,
|
|
"grad_norm": 0.0061668287962675095,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0361,
|
|
"num_tokens": 4610681.0,
|
|
"reward": 0.0702010989189148,
|
|
"reward_std": 0.14318351447582245,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.031026437878608704,
|
|
"rewards/confidence_one_or_zero": 0.0625,
|
|
"rewards/format_reward": 0.10546875,
|
|
"rewards/mean_confidence_reward": 0.3190605640411377,
|
|
"step": 11
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.29296875,
|
|
"completions/max_length": 1497.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 319.8203125,
|
|
"completions/mean_terminated_length": 452.3425598144531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0192,
|
|
"grad_norm": 0.00830077100545168,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0344,
|
|
"num_tokens": 5030875.0,
|
|
"reward": 0.06890039145946503,
|
|
"reward_std": 0.16091418266296387,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.03233125060796738,
|
|
"rewards/confidence_one_or_zero": 0.0859375,
|
|
"rewards/format_reward": 0.10546875,
|
|
"rewards/mean_confidence_reward": 0.30535900592803955,
|
|
"step": 12
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.2890625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1536.0,
|
|
"completions/mean_length": 342.921875,
|
|
"completions/mean_terminated_length": 482.3516540527344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0208,
|
|
"grad_norm": 0.006423098035156727,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0332,
|
|
"num_tokens": 5467367.0,
|
|
"reward": 0.06870196759700775,
|
|
"reward_std": 0.16580435633659363,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.03193442523479462,
|
|
"rewards/confidence_one_or_zero": 0.07421875,
|
|
"rewards/format_reward": 0.10546875,
|
|
"rewards/mean_confidence_reward": 0.30217790603637695,
|
|
"step": 13
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.2890625,
|
|
"completions/max_length": 1525.0,
|
|
"completions/max_terminated_length": 1525.0,
|
|
"completions/mean_length": 292.16015625,
|
|
"completions/mean_terminated_length": 410.9505615234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0224,
|
|
"grad_norm": 0.0063196225091814995,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0062,
|
|
"num_tokens": 5889400.0,
|
|
"reward": 0.06891889870166779,
|
|
"reward_std": 0.1583699733018875,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.032368291169404984,
|
|
"rewards/confidence_one_or_zero": 0.08203125,
|
|
"rewards/format_reward": 0.09765625,
|
|
"rewards/mean_confidence_reward": 0.29947715997695923,
|
|
"step": 14
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.26953125,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 370.01171875,
|
|
"completions/mean_terminated_length": 506.5401306152344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.024,
|
|
"grad_norm": 0.007168356329202652,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0267,
|
|
"num_tokens": 6327707.0,
|
|
"reward": 0.09601341187953949,
|
|
"reward_std": 0.19943499565124512,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.03968217968940735,
|
|
"rewards/confidence_one_or_zero": 0.078125,
|
|
"rewards/format_reward": 0.15234375,
|
|
"rewards/mean_confidence_reward": 0.3757632374763489,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.234375,
|
|
"completions/max_length": 1535.0,
|
|
"completions/max_terminated_length": 1535.0,
|
|
"completions/mean_length": 409.3203125,
|
|
"completions/mean_terminated_length": 534.6224365234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0256,
|
|
"grad_norm": 0.007660416420549154,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0294,
|
|
"num_tokens": 6791245.0,
|
|
"reward": 0.09660420566797256,
|
|
"reward_std": 0.19251175224781036,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.040863730013370514,
|
|
"rewards/confidence_one_or_zero": 0.09375,
|
|
"rewards/format_reward": 0.14453125,
|
|
"rewards/mean_confidence_reward": 0.37647533416748047,
|
|
"step": 16
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.00390625,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.00390625,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.28125,
|
|
"completions/max_length": 1452.0,
|
|
"completions/max_terminated_length": 1452.0,
|
|
"completions/mean_length": 287.19140625,
|
|
"completions/mean_terminated_length": 399.5706481933594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0272,
|
|
"grad_norm": 0.009069916792213917,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0313,
|
|
"num_tokens": 7177862.0,
|
|
"reward": 0.08393032848834991,
|
|
"reward_std": 0.1837606132030487,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.038953568786382675,
|
|
"rewards/confidence_one_or_zero": 0.07421875,
|
|
"rewards/format_reward": 0.125,
|
|
"rewards/mean_confidence_reward": 0.35352301597595215,
|
|
"step": 17
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.2734375,
|
|
"completions/max_length": 1489.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 332.26171875,
|
|
"completions/mean_terminated_length": 457.30645751953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0288,
|
|
"grad_norm": 0.007633299566805363,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0315,
|
|
"num_tokens": 7603041.0,
|
|
"reward": 0.11554718762636185,
|
|
"reward_std": 0.23108074069023132,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.047499630600214005,
|
|
"rewards/confidence_one_or_zero": 0.08984375,
|
|
"rewards/format_reward": 0.18359375,
|
|
"rewards/mean_confidence_reward": 0.4107900559902191,
|
|
"step": 18
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.265625,
|
|
"completions/max_length": 1534.0,
|
|
"completions/max_terminated_length": 1534.0,
|
|
"completions/mean_length": 359.8671875,
|
|
"completions/mean_terminated_length": 490.0318908691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0304,
|
|
"grad_norm": 0.009029170498251915,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.028,
|
|
"num_tokens": 8038847.0,
|
|
"reward": 0.11090146005153656,
|
|
"reward_std": 0.21172785758972168,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.04211445152759552,
|
|
"rewards/confidence_one_or_zero": 0.09375,
|
|
"rewards/format_reward": 0.1796875,
|
|
"rewards/mean_confidence_reward": 0.3890570402145386,
|
|
"step": 19
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.29296875,
|
|
"completions/max_length": 1494.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 312.59375,
|
|
"completions/mean_terminated_length": 442.1215515136719,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.00870381947606802,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0515,
|
|
"num_tokens": 8462351.0,
|
|
"reward": 0.13207530975341797,
|
|
"reward_std": 0.2408745288848877,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.06102453172206879,
|
|
"rewards/confidence_one_or_zero": 0.09765625,
|
|
"rewards/format_reward": 0.203125,
|
|
"rewards/mean_confidence_reward": 0.44412317872047424,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1536.0,
|
|
"completions/mean_length": 327.46484375,
|
|
"completions/mean_terminated_length": 436.61981201171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0336,
|
|
"grad_norm": 0.008368119597434998,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0343,
|
|
"num_tokens": 8889134.0,
|
|
"reward": 0.15713617205619812,
|
|
"reward_std": 0.26373666524887085,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.07208360731601715,
|
|
"rewards/confidence_one_or_zero": 0.1328125,
|
|
"rewards/format_reward": 0.23828125,
|
|
"rewards/mean_confidence_reward": 0.47515082359313965,
|
|
"step": 21
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.234375,
|
|
"completions/max_length": 1532.0,
|
|
"completions/max_terminated_length": 1532.0,
|
|
"completions/mean_length": 321.921875,
|
|
"completions/mean_terminated_length": 420.4693908691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0352,
|
|
"grad_norm": 0.009171784855425358,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0399,
|
|
"num_tokens": 9311170.0,
|
|
"reward": 0.15757590532302856,
|
|
"reward_std": 0.2669645845890045,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.07296313345432281,
|
|
"rewards/confidence_one_or_zero": 0.12890625,
|
|
"rewards/format_reward": 0.2421875,
|
|
"rewards/mean_confidence_reward": 0.465215802192688,
|
|
"step": 22
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.00390625,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1511.0,
|
|
"completions/max_terminated_length": 1511.0,
|
|
"completions/mean_length": 338.9375,
|
|
"completions/mean_terminated_length": 451.91668701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0368,
|
|
"grad_norm": 0.009062502533197403,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0839,
|
|
"num_tokens": 9743634.0,
|
|
"reward": 0.1568487286567688,
|
|
"reward_std": 0.2625208795070648,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.0558837354183197,
|
|
"rewards/confidence_one_or_zero": 0.10546875,
|
|
"rewards/format_reward": 0.25390625,
|
|
"rewards/mean_confidence_reward": 0.5000218152999878,
|
|
"step": 23
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.17578125,
|
|
"completions/max_length": 1535.0,
|
|
"completions/max_terminated_length": 1535.0,
|
|
"completions/mean_length": 404.09765625,
|
|
"completions/mean_terminated_length": 490.2796325683594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0384,
|
|
"grad_norm": 0.010168269276618958,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0685,
|
|
"num_tokens": 10181659.0,
|
|
"reward": 0.2268688827753067,
|
|
"reward_std": 0.33130136132240295,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.12170526385307312,
|
|
"rewards/confidence_one_or_zero": 0.1171875,
|
|
"rewards/format_reward": 0.33203125,
|
|
"rewards/mean_confidence_reward": 0.5028989911079407,
|
|
"step": 24
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1467.0,
|
|
"completions/max_terminated_length": 1467.0,
|
|
"completions/mean_length": 333.47265625,
|
|
"completions/mean_terminated_length": 410.4278869628906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.04,
|
|
"grad_norm": 0.009031484834849834,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0465,
|
|
"num_tokens": 10615460.0,
|
|
"reward": 0.22537299990653992,
|
|
"reward_std": 0.2971491515636444,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.09527580440044403,
|
|
"rewards/confidence_one_or_zero": 0.14453125,
|
|
"rewards/format_reward": 0.3515625,
|
|
"rewards/mean_confidence_reward": 0.5729029178619385,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.15234375,
|
|
"completions/max_length": 1519.0,
|
|
"completions/max_terminated_length": 1519.0,
|
|
"completions/mean_length": 378.61328125,
|
|
"completions/mean_terminated_length": 446.65899658203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0416,
|
|
"grad_norm": 0.008382085710763931,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0364,
|
|
"num_tokens": 11065961.0,
|
|
"reward": 0.2754327058792114,
|
|
"reward_std": 0.3278188109397888,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.12898893654346466,
|
|
"rewards/confidence_one_or_zero": 0.1171875,
|
|
"rewards/format_reward": 0.421875,
|
|
"rewards/mean_confidence_reward": 0.5870228409767151,
|
|
"step": 26
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.21484375,
|
|
"completions/max_length": 1437.0,
|
|
"completions/max_terminated_length": 1437.0,
|
|
"completions/mean_length": 353.63671875,
|
|
"completions/mean_terminated_length": 450.4029846191406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0432,
|
|
"grad_norm": 0.008282415568828583,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0669,
|
|
"num_tokens": 11489188.0,
|
|
"reward": 0.2620762586593628,
|
|
"reward_std": 0.3158631920814514,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.11008860170841217,
|
|
"rewards/confidence_one_or_zero": 0.11328125,
|
|
"rewards/format_reward": 0.4140625,
|
|
"rewards/mean_confidence_reward": 0.6116750836372375,
|
|
"step": 27
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.171875,
|
|
"completions/max_length": 1495.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 368.2109375,
|
|
"completions/mean_terminated_length": 444.632080078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0448,
|
|
"grad_norm": 0.010257712565362453,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0683,
|
|
"num_tokens": 11918138.0,
|
|
"reward": 0.30954355001449585,
|
|
"reward_std": 0.33774739503860474,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.13471046090126038,
|
|
"rewards/confidence_one_or_zero": 0.171875,
|
|
"rewards/format_reward": 0.4765625,
|
|
"rewards/mean_confidence_reward": 0.654219388961792,
|
|
"step": 28
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.14453125,
|
|
"completions/max_length": 1456.0,
|
|
"completions/max_terminated_length": 1456.0,
|
|
"completions/mean_length": 322.359375,
|
|
"completions/mean_terminated_length": 376.8218994140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0464,
|
|
"grad_norm": 0.00957749504595995,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0394,
|
|
"num_tokens": 12349814.0,
|
|
"reward": 0.29460030794143677,
|
|
"reward_std": 0.32603347301483154,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.13607406616210938,
|
|
"rewards/confidence_one_or_zero": 0.15234375,
|
|
"rewards/format_reward": 0.453125,
|
|
"rewards/mean_confidence_reward": 0.5973583459854126,
|
|
"step": 29
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.10546875,
|
|
"completions/max_length": 1522.0,
|
|
"completions/max_terminated_length": 1522.0,
|
|
"completions/mean_length": 389.828125,
|
|
"completions/mean_terminated_length": 435.7904052734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.007783412467688322,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0552,
|
|
"num_tokens": 12779682.0,
|
|
"reward": 0.3716452419757843,
|
|
"reward_std": 0.3282296657562256,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.1612575352191925,
|
|
"rewards/confidence_one_or_zero": 0.140625,
|
|
"rewards/format_reward": 0.58203125,
|
|
"rewards/mean_confidence_reward": 0.7023523449897766,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1328125,
|
|
"completions/max_length": 1486.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 348.17578125,
|
|
"completions/mean_terminated_length": 401.5,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0496,
|
|
"grad_norm": 0.009112906642258167,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0457,
|
|
"num_tokens": 13204127.0,
|
|
"reward": 0.3648914098739624,
|
|
"reward_std": 0.3676462769508362,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.17900002002716064,
|
|
"rewards/confidence_one_or_zero": 0.12890625,
|
|
"rewards/format_reward": 0.53515625,
|
|
"rewards/mean_confidence_reward": 0.6544030904769897,
|
|
"step": 31
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.140625,
|
|
"completions/max_length": 1477.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 291.328125,
|
|
"completions/mean_terminated_length": 339.0,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.0512,
|
|
"grad_norm": 0.010887404903769493,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0865,
|
|
"num_tokens": 13605243.0,
|
|
"reward": 0.413626492023468,
|
|
"reward_std": 0.35561421513557434,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.2256888449192047,
|
|
"rewards/confidence_one_or_zero": 0.14453125,
|
|
"rewards/format_reward": 0.6015625,
|
|
"rewards/mean_confidence_reward": 0.6688516139984131,
|
|
"step": 32
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.08984375,
|
|
"completions/max_length": 1473.0,
|
|
"completions/max_terminated_length": 1473.0,
|
|
"completions/mean_length": 350.7734375,
|
|
"completions/mean_terminated_length": 385.3991394042969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0528,
|
|
"grad_norm": 0.008478617295622826,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0285,
|
|
"num_tokens": 14036193.0,
|
|
"reward": 0.4472588896751404,
|
|
"reward_std": 0.32160085439682007,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.21873486042022705,
|
|
"rewards/confidence_one_or_zero": 0.1484375,
|
|
"rewards/format_reward": 0.66796875,
|
|
"rewards/mean_confidence_reward": 0.6860401630401611,
|
|
"step": 33
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0859375,
|
|
"completions/max_length": 1349.0,
|
|
"completions/max_terminated_length": 1349.0,
|
|
"completions/mean_length": 311.640625,
|
|
"completions/mean_terminated_length": 340.940185546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 6.0,
|
|
"epoch": 0.0544,
|
|
"grad_norm": 0.009975390508770943,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0722,
|
|
"num_tokens": 14457565.0,
|
|
"reward": 0.483206570148468,
|
|
"reward_std": 0.3413535952568054,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.25547391176223755,
|
|
"rewards/confidence_one_or_zero": 0.171875,
|
|
"rewards/format_reward": 0.70703125,
|
|
"rewards/mean_confidence_reward": 0.6938413381576538,
|
|
"step": 34
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05859375,
|
|
"completions/max_length": 1353.0,
|
|
"completions/max_terminated_length": 1353.0,
|
|
"completions/mean_length": 342.3984375,
|
|
"completions/mean_terminated_length": 363.7095642089844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.056,
|
|
"grad_norm": 0.010337012819945812,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0628,
|
|
"num_tokens": 14890075.0,
|
|
"reward": 0.5229313373565674,
|
|
"reward_std": 0.34047645330429077,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.28414231538772583,
|
|
"rewards/confidence_one_or_zero": 0.14453125,
|
|
"rewards/format_reward": 0.7421875,
|
|
"rewards/mean_confidence_reward": 0.6841410398483276,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0703125,
|
|
"completions/max_length": 1427.0,
|
|
"completions/max_terminated_length": 1427.0,
|
|
"completions/mean_length": 345.375,
|
|
"completions/mean_terminated_length": 371.4958190917969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.0576,
|
|
"grad_norm": 0.008786818943917751,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.041,
|
|
"num_tokens": 15312395.0,
|
|
"reward": 0.5578696727752686,
|
|
"reward_std": 0.35204410552978516,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.3149563670158386,
|
|
"rewards/confidence_one_or_zero": 0.15234375,
|
|
"rewards/format_reward": 0.77734375,
|
|
"rewards/mean_confidence_reward": 0.6901431083679199,
|
|
"step": 36
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0546875,
|
|
"completions/max_length": 1487.0,
|
|
"completions/max_terminated_length": 1487.0,
|
|
"completions/mean_length": 319.609375,
|
|
"completions/mean_terminated_length": 338.0991516113281,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0592,
|
|
"grad_norm": 0.009227329865098,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0323,
|
|
"num_tokens": 15736471.0,
|
|
"reward": 0.5536789894104004,
|
|
"reward_std": 0.33590322732925415,
|
|
"rewards/accuracy_reward": 0.00390625,
|
|
"rewards/brier_reward": 0.33391907811164856,
|
|
"rewards/confidence_one_or_zero": 0.10546875,
|
|
"rewards/format_reward": 0.76953125,
|
|
"rewards/mean_confidence_reward": 0.6370420455932617,
|
|
"step": 37
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1482.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 318.7578125,
|
|
"completions/mean_terminated_length": 340.00836181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0608,
|
|
"grad_norm": 0.008682608604431152,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0358,
|
|
"num_tokens": 16163457.0,
|
|
"reward": 0.5562059879302979,
|
|
"reward_std": 0.3175504505634308,
|
|
"rewards/accuracy_reward": 0.01171875,
|
|
"rewards/brier_reward": 0.307722806930542,
|
|
"rewards/confidence_one_or_zero": 0.171875,
|
|
"rewards/format_reward": 0.79296875,
|
|
"rewards/mean_confidence_reward": 0.6644600629806519,
|
|
"step": 38
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.05859375,
|
|
"completions/max_length": 1495.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 292.265625,
|
|
"completions/mean_terminated_length": 310.4564514160156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 18.0,
|
|
"epoch": 0.0624,
|
|
"grad_norm": 0.010437216609716415,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0107,
|
|
"num_tokens": 16574525.0,
|
|
"reward": 0.5814734697341919,
|
|
"reward_std": 0.33318889141082764,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.35825785994529724,
|
|
"rewards/confidence_one_or_zero": 0.12109375,
|
|
"rewards/format_reward": 0.7890625,
|
|
"rewards/mean_confidence_reward": 0.651659369468689,
|
|
"step": 39
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02734375,
|
|
"completions/max_length": 1528.0,
|
|
"completions/max_terminated_length": 1528.0,
|
|
"completions/mean_length": 282.6953125,
|
|
"completions/mean_terminated_length": 290.6425476074219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.009016141295433044,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 16978535.0,
|
|
"reward": 0.6290339231491089,
|
|
"reward_std": 0.29086869955062866,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.3947851061820984,
|
|
"rewards/confidence_one_or_zero": 0.14453125,
|
|
"rewards/format_reward": 0.85546875,
|
|
"rewards/mean_confidence_reward": 0.6435203552246094,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.00390625,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0234375,
|
|
"completions/max_length": 1474.0,
|
|
"completions/max_terminated_length": 1474.0,
|
|
"completions/mean_length": 301.30859375,
|
|
"completions/mean_terminated_length": 308.5400085449219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.0656,
|
|
"grad_norm": 0.008498923853039742,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0508,
|
|
"num_tokens": 17405094.0,
|
|
"reward": 0.6749566197395325,
|
|
"reward_std": 0.2964378893375397,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.451474130153656,
|
|
"rewards/confidence_one_or_zero": 0.1875,
|
|
"rewards/format_reward": 0.87890625,
|
|
"rewards/mean_confidence_reward": 0.6043362617492676,
|
|
"step": 41
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0703125,
|
|
"completions/max_length": 1510.0,
|
|
"completions/max_terminated_length": 1510.0,
|
|
"completions/mean_length": 239.7265625,
|
|
"completions/mean_terminated_length": 257.8571472167969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 24.0,
|
|
"epoch": 0.0672,
|
|
"grad_norm": 0.009880481287837029,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0041,
|
|
"num_tokens": 17799256.0,
|
|
"reward": 0.6685827970504761,
|
|
"reward_std": 0.3361000120639801,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.4426327645778656,
|
|
"rewards/confidence_one_or_zero": 0.15234375,
|
|
"rewards/format_reward": 0.84765625,
|
|
"rewards/mean_confidence_reward": 0.6032766699790955,
|
|
"step": 42
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 1410.0,
|
|
"completions/max_terminated_length": 1410.0,
|
|
"completions/mean_length": 289.0703125,
|
|
"completions/mean_terminated_length": 294.8287048339844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.0688,
|
|
"grad_norm": 0.0102525120601058,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.009,
|
|
"num_tokens": 18222346.0,
|
|
"reward": 0.6848199367523193,
|
|
"reward_std": 0.3227686882019043,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.49073219299316406,
|
|
"rewards/confidence_one_or_zero": 0.16015625,
|
|
"rewards/format_reward": 0.859375,
|
|
"rewards/mean_confidence_reward": 0.5575064420700073,
|
|
"step": 43
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 1517.0,
|
|
"completions/max_terminated_length": 1517.0,
|
|
"completions/mean_length": 265.3046875,
|
|
"completions/mean_terminated_length": 276.08941650390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.0704,
|
|
"grad_norm": 0.008614201098680496,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0023,
|
|
"num_tokens": 18641896.0,
|
|
"reward": 0.7085627317428589,
|
|
"reward_std": 0.2768844962120056,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.5069676637649536,
|
|
"rewards/confidence_one_or_zero": 0.17578125,
|
|
"rewards/format_reward": 0.89453125,
|
|
"rewards/mean_confidence_reward": 0.5584971308708191,
|
|
"step": 44
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 1428.0,
|
|
"completions/max_terminated_length": 1428.0,
|
|
"completions/mean_length": 221.6640625,
|
|
"completions/mean_terminated_length": 226.07968139648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.072,
|
|
"grad_norm": 0.010863802395761013,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0058,
|
|
"num_tokens": 19036490.0,
|
|
"reward": 0.7233068943023682,
|
|
"reward_std": 0.29288819432258606,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.5286435484886169,
|
|
"rewards/confidence_one_or_zero": 0.19140625,
|
|
"rewards/format_reward": 0.890625,
|
|
"rewards/mean_confidence_reward": 0.5390398502349854,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02734375,
|
|
"completions/max_length": 1287.0,
|
|
"completions/max_terminated_length": 1287.0,
|
|
"completions/mean_length": 217.9453125,
|
|
"completions/mean_terminated_length": 224.07228088378906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.0,
|
|
"epoch": 0.0736,
|
|
"grad_norm": 0.0098550571128726,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 19410388.0,
|
|
"reward": 0.7251278162002563,
|
|
"reward_std": 0.2924645245075226,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.5518166422843933,
|
|
"rewards/confidence_one_or_zero": 0.2109375,
|
|
"rewards/format_reward": 0.875,
|
|
"rewards/mean_confidence_reward": 0.49754637479782104,
|
|
"step": 46
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 1231.0,
|
|
"completions/max_terminated_length": 1231.0,
|
|
"completions/mean_length": 214.70703125,
|
|
"completions/mean_terminated_length": 217.2529754638672,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.0752,
|
|
"grad_norm": 0.010724488645792007,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0084,
|
|
"num_tokens": 19791513.0,
|
|
"reward": 0.7865052223205566,
|
|
"reward_std": 0.25504371523857117,
|
|
"rewards/accuracy_reward": 0.01171875,
|
|
"rewards/brier_reward": 0.639415442943573,
|
|
"rewards/confidence_one_or_zero": 0.22265625,
|
|
"rewards/format_reward": 0.921875,
|
|
"rewards/mean_confidence_reward": 0.43946534395217896,
|
|
"step": 47
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1502.0,
|
|
"completions/max_terminated_length": 1502.0,
|
|
"completions/mean_length": 226.703125,
|
|
"completions/mean_terminated_length": 228.48818969726562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.0768,
|
|
"grad_norm": 0.008438576012849808,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0097,
|
|
"num_tokens": 20175853.0,
|
|
"reward": 0.8347652554512024,
|
|
"reward_std": 0.22036978602409363,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.681247889995575,
|
|
"rewards/confidence_one_or_zero": 0.2578125,
|
|
"rewards/format_reward": 0.95703125,
|
|
"rewards/mean_confidence_reward": 0.4102952182292938,
|
|
"step": 48
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1461.0,
|
|
"completions/max_terminated_length": 1461.0,
|
|
"completions/mean_length": 223.40625,
|
|
"completions/mean_terminated_length": 225.1653594970703,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.0,
|
|
"epoch": 0.0784,
|
|
"grad_norm": 0.009187168441712856,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0115,
|
|
"num_tokens": 20584133.0,
|
|
"reward": 0.8064360022544861,
|
|
"reward_std": 0.25242382287979126,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.6714644432067871,
|
|
"rewards/confidence_one_or_zero": 0.296875,
|
|
"rewards/format_reward": 0.921875,
|
|
"rewards/mean_confidence_reward": 0.39295095205307007,
|
|
"step": 49
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1376.0,
|
|
"completions/max_terminated_length": 1376.0,
|
|
"completions/mean_length": 172.87109375,
|
|
"completions/mean_terminated_length": 174.23228454589844,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.008574232459068298,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0015,
|
|
"num_tokens": 20968516.0,
|
|
"reward": 0.8943137526512146,
|
|
"reward_std": 0.19495204091072083,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.7730011940002441,
|
|
"rewards/confidence_one_or_zero": 0.375,
|
|
"rewards/format_reward": 0.96875,
|
|
"rewards/mean_confidence_reward": 0.30389389395713806,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 1496.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 168.61328125,
|
|
"completions/mean_terminated_length": 171.28968811035156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.0816,
|
|
"grad_norm": 0.0096592977643013,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0246,
|
|
"num_tokens": 21351529.0,
|
|
"reward": 0.8908511400222778,
|
|
"reward_std": 0.2035958170890808,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.7856073379516602,
|
|
"rewards/confidence_one_or_zero": 0.44921875,
|
|
"rewards/format_reward": 0.953125,
|
|
"rewards/mean_confidence_reward": 0.24615687131881714,
|
|
"step": 51
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1532.0,
|
|
"completions/max_terminated_length": 1532.0,
|
|
"completions/mean_length": 151.8984375,
|
|
"completions/mean_terminated_length": 153.094482421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.0832,
|
|
"grad_norm": 0.008558180183172226,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 21737799.0,
|
|
"reward": 0.9212564826011658,
|
|
"reward_std": 0.15918582677841187,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.8503240346908569,
|
|
"rewards/confidence_one_or_zero": 0.578125,
|
|
"rewards/format_reward": 0.9609375,
|
|
"rewards/mean_confidence_reward": 0.18343669176101685,
|
|
"step": 52
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1047.0,
|
|
"completions/max_terminated_length": 1047.0,
|
|
"completions/mean_length": 131.4921875,
|
|
"completions/mean_terminated_length": 132.0078582763672,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.0848,
|
|
"grad_norm": 0.008157819509506226,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0157,
|
|
"num_tokens": 22110085.0,
|
|
"reward": 0.9251226186752319,
|
|
"reward_std": 0.1277318149805069,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.8502437472343445,
|
|
"rewards/confidence_one_or_zero": 0.58984375,
|
|
"rewards/format_reward": 0.9765625,
|
|
"rewards/mean_confidence_reward": 0.18238280713558197,
|
|
"step": 53
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1391.0,
|
|
"completions/max_terminated_length": 1391.0,
|
|
"completions/mean_length": 129.55078125,
|
|
"completions/mean_terminated_length": 130.57086181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.0864,
|
|
"grad_norm": 0.006626965943723917,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 22478362.0,
|
|
"reward": 0.9469143152236938,
|
|
"reward_std": 0.11642387509346008,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.897733211517334,
|
|
"rewards/confidence_one_or_zero": 0.69921875,
|
|
"rewards/format_reward": 0.9765625,
|
|
"rewards/mean_confidence_reward": 0.11613567173480988,
|
|
"step": 54
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 795.0,
|
|
"completions/max_terminated_length": 795.0,
|
|
"completions/mean_length": 120.4140625,
|
|
"completions/mean_terminated_length": 121.3622055053711,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 23.0,
|
|
"epoch": 0.088,
|
|
"grad_norm": 0.005671264138072729,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0109,
|
|
"num_tokens": 22861796.0,
|
|
"reward": 0.96063232421875,
|
|
"reward_std": 0.09957661479711533,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.9134503602981567,
|
|
"rewards/confidence_one_or_zero": 0.8203125,
|
|
"rewards/format_reward": 0.9765625,
|
|
"rewards/mean_confidence_reward": 0.07218749821186066,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.00390625,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.00390625,
|
|
"calib/nonempty_step_conf_rate": 0.00390625,
|
|
"calib/step_conf_rate": 0.00390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 712.0,
|
|
"completions/max_terminated_length": 712.0,
|
|
"completions/mean_length": 107.37109375,
|
|
"completions/mean_terminated_length": 107.37109375,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.0896,
|
|
"grad_norm": 0.005763660185039043,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0026,
|
|
"num_tokens": 23236467.0,
|
|
"reward": 0.9770891666412354,
|
|
"reward_std": 0.06000019609928131,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.9385515451431274,
|
|
"rewards/confidence_one_or_zero": 0.8984375,
|
|
"rewards/format_reward": 0.98828125,
|
|
"rewards/mean_confidence_reward": 0.04484374821186066,
|
|
"step": 56
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1508.0,
|
|
"completions/max_terminated_length": 1508.0,
|
|
"completions/mean_length": 107.41796875,
|
|
"completions/mean_terminated_length": 107.83922576904297,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 26.0,
|
|
"epoch": 0.0912,
|
|
"grad_norm": 0.005554537288844585,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 23593294.0,
|
|
"reward": 0.974513053894043,
|
|
"reward_std": 0.07732105255126953,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.9333992004394531,
|
|
"rewards/confidence_one_or_zero": 0.91015625,
|
|
"rewards/format_reward": 0.98046875,
|
|
"rewards/mean_confidence_reward": 0.03757812827825546,
|
|
"step": 57
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1242.0,
|
|
"completions/max_terminated_length": 1242.0,
|
|
"completions/mean_length": 121.54296875,
|
|
"completions/mean_terminated_length": 121.54296875,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.0928,
|
|
"grad_norm": 0.005623976234346628,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0206,
|
|
"num_tokens": 23965777.0,
|
|
"reward": 0.9896925091743469,
|
|
"reward_std": 0.02897283062338829,
|
|
"rewards/accuracy_reward": 0.08984375,
|
|
"rewards/brier_reward": 0.8973519802093506,
|
|
"rewards/confidence_one_or_zero": 0.9453125,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.013710937462747097,
|
|
"step": 58
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 725.0,
|
|
"completions/max_terminated_length": 725.0,
|
|
"completions/mean_length": 101.3359375,
|
|
"completions/mean_terminated_length": 101.3359375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.0944,
|
|
"grad_norm": 0.005760283675044775,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 24336647.0,
|
|
"reward": 0.980745255947113,
|
|
"reward_std": 0.05597168207168579,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.949769914150238,
|
|
"rewards/confidence_one_or_zero": 0.96875,
|
|
"rewards/format_reward": 0.984375,
|
|
"rewards/mean_confidence_reward": 0.016249999403953552,
|
|
"step": 59
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 654.0,
|
|
"completions/max_terminated_length": 654.0,
|
|
"completions/mean_length": 90.69140625,
|
|
"completions/mean_terminated_length": 90.69140625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.007192954421043396,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0036,
|
|
"num_tokens": 24693912.0,
|
|
"reward": 0.9823489785194397,
|
|
"reward_std": 0.050364814698696136,
|
|
"rewards/accuracy_reward": 0.078125,
|
|
"rewards/brier_reward": 0.9021960496902466,
|
|
"rewards/confidence_one_or_zero": 0.97265625,
|
|
"rewards/format_reward": 0.984375,
|
|
"rewards/mean_confidence_reward": 0.009453125298023224,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 791.0,
|
|
"completions/max_terminated_length": 791.0,
|
|
"completions/mean_length": 97.859375,
|
|
"completions/mean_terminated_length": 97.859375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.0976,
|
|
"grad_norm": 0.0014389591524377465,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 25065868.0,
|
|
"reward": 0.9979599714279175,
|
|
"reward_std": 0.005772889591753483,
|
|
"rewards/accuracy_reward": 0.078125,
|
|
"rewards/brier_reward": 0.9177929759025574,
|
|
"rewards/confidence_one_or_zero": 0.984375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.005703125149011612,
|
|
"step": 61
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1016.0,
|
|
"completions/max_terminated_length": 1016.0,
|
|
"completions/mean_length": 92.25,
|
|
"completions/mean_terminated_length": 92.25,
|
|
"completions/min_length": 27.0,
|
|
"completions/min_terminated_length": 27.0,
|
|
"epoch": 0.0992,
|
|
"grad_norm": 0.00016888575919438154,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 25424396.0,
|
|
"reward": 0.9998132586479187,
|
|
"reward_std": 0.0005308896070346236,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.9371246099472046,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0012109375093132257,
|
|
"step": 62
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 801.0,
|
|
"completions/max_terminated_length": 801.0,
|
|
"completions/mean_length": 87.5859375,
|
|
"completions/mean_terminated_length": 87.5859375,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1008,
|
|
"grad_norm": 0.002231016056612134,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0017,
|
|
"num_tokens": 25789338.0,
|
|
"reward": 0.995339035987854,
|
|
"reward_std": 0.013185895048081875,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9555199146270752,
|
|
"rewards/confidence_one_or_zero": 0.9921875,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0033984375186264515,
|
|
"step": 63
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 490.0,
|
|
"completions/max_terminated_length": 490.0,
|
|
"completions/mean_length": 76.48046875,
|
|
"completions/mean_terminated_length": 76.48046875,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.1024,
|
|
"grad_norm": 0.004412069451063871,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 26162277.0,
|
|
"reward": 0.9921835660934448,
|
|
"reward_std": 0.02211090549826622,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.9413964748382568,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.00019531250291038305,
|
|
"step": 64
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 616.0,
|
|
"completions/max_terminated_length": 616.0,
|
|
"completions/mean_length": 79.14453125,
|
|
"completions/mean_terminated_length": 79.14453125,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.104,
|
|
"grad_norm": 1.28965211843024e-06,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 26526962.0,
|
|
"reward": 0.9999991655349731,
|
|
"reward_std": 4.981606707588071e-06,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9609339833259583,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.00011718749738065526,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 742.0,
|
|
"completions/max_terminated_length": 742.0,
|
|
"completions/mean_length": 89.03515625,
|
|
"completions/mean_terminated_length": 89.38431549072266,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1056,
|
|
"grad_norm": 0.0006708145374432206,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.002,
|
|
"num_tokens": 26899483.0,
|
|
"reward": 0.9960939288139343,
|
|
"reward_std": 0.011050763539969921,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.9531234502792358,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 7.812499825377017e-05,
|
|
"step": 66
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 912.0,
|
|
"completions/max_terminated_length": 912.0,
|
|
"completions/mean_length": 90.73046875,
|
|
"completions/mean_terminated_length": 90.73046875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.1072,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 27273822.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 67
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 531.0,
|
|
"completions/max_terminated_length": 531.0,
|
|
"completions/mean_length": 75.40234375,
|
|
"completions/mean_terminated_length": 75.40234375,
|
|
"completions/min_length": 29.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.1088,
|
|
"grad_norm": 0.0036680991761386395,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 27633941.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 68
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 933.0,
|
|
"completions/max_terminated_length": 933.0,
|
|
"completions/mean_length": 88.01953125,
|
|
"completions/mean_terminated_length": 88.01953125,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1104,
|
|
"grad_norm": 0.006830692756921053,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0181,
|
|
"num_tokens": 27976130.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.09765625,
|
|
"rewards/brier_reward": 0.89453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 69
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 702.0,
|
|
"completions/max_terminated_length": 702.0,
|
|
"completions/mean_length": 79.59375,
|
|
"completions/mean_terminated_length": 79.59375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 28339722.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.08984375,
|
|
"rewards/brier_reward": 0.91015625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 820.0,
|
|
"completions/max_terminated_length": 820.0,
|
|
"completions/mean_length": 72.95703125,
|
|
"completions/mean_terminated_length": 72.95703125,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.1136,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 28692735.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.078125,
|
|
"rewards/brier_reward": 0.921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 71
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 580.0,
|
|
"completions/max_terminated_length": 580.0,
|
|
"completions/mean_length": 77.0,
|
|
"completions/mean_terminated_length": 77.0,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.1152,
|
|
"grad_norm": 0.004060762468725443,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0092,
|
|
"num_tokens": 29041839.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.98046875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 72
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 769.0,
|
|
"completions/max_terminated_length": 769.0,
|
|
"completions/mean_length": 82.875,
|
|
"completions/mean_terminated_length": 82.875,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1168,
|
|
"grad_norm": 0.002366835018619895,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 29406799.0,
|
|
"reward": 0.9960944652557373,
|
|
"reward_std": 0.01104910671710968,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.9609371423721313,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 3.9062499126885086e-05,
|
|
"step": 73
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 545.0,
|
|
"completions/max_terminated_length": 545.0,
|
|
"completions/mean_length": 73.09765625,
|
|
"completions/mean_terminated_length": 73.09765625,
|
|
"completions/min_length": 28.0,
|
|
"completions/min_terminated_length": 28.0,
|
|
"epoch": 0.1184,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 29751056.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 74
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 463.0,
|
|
"completions/max_terminated_length": 463.0,
|
|
"completions/mean_length": 80.96484375,
|
|
"completions/mean_terminated_length": 80.96484375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.12,
|
|
"grad_norm": 0.00017594861856196076,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 30121591.0,
|
|
"reward": 0.9998788833618164,
|
|
"reward_std": 0.0003452748933341354,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.972412109375,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0009765625,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 510.0,
|
|
"completions/max_terminated_length": 510.0,
|
|
"completions/mean_length": 70.1875,
|
|
"completions/mean_terminated_length": 70.1875,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1216,
|
|
"grad_norm": 0.004419372417032719,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 30475583.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048554442822933,
|
|
"rewards/accuracy_reward": 0.08203125,
|
|
"rewards/brier_reward": 0.9140625,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 76
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 454.0,
|
|
"completions/max_terminated_length": 454.0,
|
|
"completions/mean_length": 70.890625,
|
|
"completions/mean_terminated_length": 70.890625,
|
|
"completions/min_length": 9.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.1232,
|
|
"grad_norm": 0.0018329236190766096,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0025,
|
|
"num_tokens": 30834539.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048554442822933,
|
|
"rewards/accuracy_reward": 0.1015625,
|
|
"rewards/brier_reward": 0.89453125,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 77
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 559.0,
|
|
"completions/max_terminated_length": 559.0,
|
|
"completions/mean_length": 72.89453125,
|
|
"completions/mean_terminated_length": 72.89453125,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1248,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31189880.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 78
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 476.0,
|
|
"completions/max_terminated_length": 476.0,
|
|
"completions/mean_length": 69.28515625,
|
|
"completions/mean_terminated_length": 69.28515625,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1264,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31547609.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9609375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 79
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 546.0,
|
|
"completions/max_terminated_length": 546.0,
|
|
"completions/mean_length": 77.7265625,
|
|
"completions/mean_terminated_length": 77.7265625,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 31909627.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 347.0,
|
|
"completions/max_terminated_length": 347.0,
|
|
"completions/mean_length": 69.625,
|
|
"completions/mean_terminated_length": 69.625,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1296,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 32267299.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 81
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 565.0,
|
|
"completions/max_terminated_length": 565.0,
|
|
"completions/mean_length": 66.53125,
|
|
"completions/mean_terminated_length": 66.53125,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1312,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 32627651.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 82
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 654.0,
|
|
"completions/max_terminated_length": 654.0,
|
|
"completions/mean_length": 71.55859375,
|
|
"completions/mean_terminated_length": 71.55859375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1328,
|
|
"grad_norm": 0.004377719480544329,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 32983826.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 83
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1187.0,
|
|
"completions/max_terminated_length": 1187.0,
|
|
"completions/mean_length": 73.47265625,
|
|
"completions/mean_terminated_length": 73.47265625,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1344,
|
|
"grad_norm": 0.004862726666033268,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.018,
|
|
"num_tokens": 33352691.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.9296875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 84
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 338.0,
|
|
"completions/max_terminated_length": 338.0,
|
|
"completions/mean_length": 69.0,
|
|
"completions/mean_terminated_length": 69.0,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.136,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 33705051.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0859375,
|
|
"rewards/brier_reward": 0.9140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 636.0,
|
|
"completions/max_terminated_length": 636.0,
|
|
"completions/mean_length": 69.3828125,
|
|
"completions/mean_terminated_length": 69.3828125,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1376,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 34053397.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 86
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 596.0,
|
|
"completions/max_terminated_length": 596.0,
|
|
"completions/mean_length": 78.84375,
|
|
"completions/mean_terminated_length": 78.84375,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1392,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 34412237.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.08984375,
|
|
"rewards/brier_reward": 0.91015625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 87
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 415.0,
|
|
"completions/max_terminated_length": 415.0,
|
|
"completions/mean_length": 65.546875,
|
|
"completions/mean_terminated_length": 65.546875,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1408,
|
|
"grad_norm": 0.007279723882675171,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0011,
|
|
"num_tokens": 34761065.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.93359375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 88
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 752.0,
|
|
"completions/max_terminated_length": 752.0,
|
|
"completions/mean_length": 69.90625,
|
|
"completions/mean_terminated_length": 69.90625,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1424,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35123865.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 89
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 886.0,
|
|
"completions/max_terminated_length": 886.0,
|
|
"completions/mean_length": 71.12890625,
|
|
"completions/mean_terminated_length": 71.12890625,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0048187910579144955,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35476994.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.93359375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 375.0,
|
|
"completions/max_terminated_length": 375.0,
|
|
"completions/mean_length": 65.71484375,
|
|
"completions/mean_terminated_length": 65.71484375,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1456,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 35820049.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.09375,
|
|
"rewards/brier_reward": 0.90625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 91
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 528.0,
|
|
"completions/max_terminated_length": 528.0,
|
|
"completions/mean_length": 71.38671875,
|
|
"completions/mean_terminated_length": 71.38671875,
|
|
"completions/min_length": 29.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.1472,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 36176772.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 92
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 395.0,
|
|
"completions/max_terminated_length": 395.0,
|
|
"completions/mean_length": 69.9609375,
|
|
"completions/mean_terminated_length": 69.9609375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1488,
|
|
"grad_norm": 0.005049441009759903,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0016,
|
|
"num_tokens": 36533554.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.93359375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 93
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 674.0,
|
|
"completions/max_terminated_length": 674.0,
|
|
"completions/mean_length": 66.8515625,
|
|
"completions/mean_terminated_length": 66.8515625,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1504,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 36884772.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0859375,
|
|
"rewards/brier_reward": 0.9140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 94
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 693.0,
|
|
"completions/max_terminated_length": 693.0,
|
|
"completions/mean_length": 70.703125,
|
|
"completions/mean_terminated_length": 70.703125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.152,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 37250480.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 612.0,
|
|
"completions/max_terminated_length": 612.0,
|
|
"completions/mean_length": 78.703125,
|
|
"completions/mean_terminated_length": 78.703125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1536,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 37624020.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 96
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 428.0,
|
|
"completions/max_terminated_length": 428.0,
|
|
"completions/mean_length": 75.44140625,
|
|
"completions/mean_terminated_length": 75.44140625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1552,
|
|
"grad_norm": 0.006449607666581869,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 37976149.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.02209709770977497,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.9296875,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 97
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 650.0,
|
|
"completions/max_terminated_length": 650.0,
|
|
"completions/mean_length": 80.921875,
|
|
"completions/mean_terminated_length": 80.921875,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1568,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 38323969.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 98
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 559.0,
|
|
"completions/max_terminated_length": 559.0,
|
|
"completions/mean_length": 88.71875,
|
|
"completions/mean_terminated_length": 88.71875,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1584,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 38693705.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9609375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 99
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 723.0,
|
|
"completions/max_terminated_length": 723.0,
|
|
"completions/mean_length": 106.94140625,
|
|
"completions/mean_terminated_length": 106.94140625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39079994.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 583.0,
|
|
"completions/max_terminated_length": 583.0,
|
|
"completions/mean_length": 118.9921875,
|
|
"completions/mean_terminated_length": 118.9921875,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1616,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39460704.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 101
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 685.0,
|
|
"completions/max_terminated_length": 685.0,
|
|
"completions/mean_length": 120.30078125,
|
|
"completions/mean_terminated_length": 120.30078125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1632,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 39830373.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.078125,
|
|
"rewards/brier_reward": 0.921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 102
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 646.0,
|
|
"completions/max_terminated_length": 646.0,
|
|
"completions/mean_length": 141.27734375,
|
|
"completions/mean_terminated_length": 141.27734375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1648,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 40221460.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 103
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 710.0,
|
|
"completions/max_terminated_length": 710.0,
|
|
"completions/mean_length": 156.98828125,
|
|
"completions/mean_terminated_length": 156.98828125,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1664,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 40598393.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 104
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1091.0,
|
|
"completions/max_terminated_length": 1091.0,
|
|
"completions/mean_length": 171.81640625,
|
|
"completions/mean_terminated_length": 171.81640625,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.168,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 40992818.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 818.0,
|
|
"completions/max_terminated_length": 818.0,
|
|
"completions/mean_length": 199.01953125,
|
|
"completions/mean_terminated_length": 199.01953125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1696,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 41400047.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 106
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 819.0,
|
|
"completions/max_terminated_length": 819.0,
|
|
"completions/mean_length": 205.16796875,
|
|
"completions/mean_terminated_length": 205.16796875,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.1712,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 41803914.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 107
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 827.0,
|
|
"completions/max_terminated_length": 827.0,
|
|
"completions/mean_length": 200.7578125,
|
|
"completions/mean_terminated_length": 200.7578125,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1728,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 42197876.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 108
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 910.0,
|
|
"completions/max_terminated_length": 910.0,
|
|
"completions/mean_length": 207.0859375,
|
|
"completions/mean_terminated_length": 208.71653747558594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.1744,
|
|
"grad_norm": 0.0013323465827852488,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0121,
|
|
"num_tokens": 42583978.0,
|
|
"reward": 0.9882822036743164,
|
|
"reward_std": 0.033145640045404434,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.98828125,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 109
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 887.0,
|
|
"completions/max_terminated_length": 887.0,
|
|
"completions/mean_length": 229.625,
|
|
"completions/mean_terminated_length": 229.625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 42986818.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1155.0,
|
|
"completions/max_terminated_length": 1155.0,
|
|
"completions/mean_length": 238.19140625,
|
|
"completions/mean_terminated_length": 238.19140625,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1776,
|
|
"grad_norm": 0.001869988744147122,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 43389203.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048554442822933,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 111
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 700.0,
|
|
"completions/max_terminated_length": 700.0,
|
|
"completions/mean_length": 220.421875,
|
|
"completions/mean_terminated_length": 220.421875,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.1792,
|
|
"grad_norm": 0.0012250031577423215,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 43793759.0,
|
|
"reward": 0.9960165619850159,
|
|
"reward_std": 0.01126952189952135,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.9764062166213989,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0007812500116415322,
|
|
"step": 112
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 750.0,
|
|
"completions/max_terminated_length": 750.0,
|
|
"completions/mean_length": 242.1171875,
|
|
"completions/mean_terminated_length": 242.1171875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1808,
|
|
"grad_norm": 0.0009556888253428042,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 44178789.0,
|
|
"reward": 0.9980478286743164,
|
|
"reward_std": 0.005524259991943836,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.00390625,
|
|
"step": 113
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 616.0,
|
|
"completions/max_terminated_length": 616.0,
|
|
"completions/mean_length": 216.3515625,
|
|
"completions/mean_terminated_length": 216.3515625,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.1824,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 44567215.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9609375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 114
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 833.0,
|
|
"completions/max_terminated_length": 833.0,
|
|
"completions/mean_length": 206.95703125,
|
|
"completions/mean_terminated_length": 207.76864624023438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.184,
|
|
"grad_norm": 0.0012391918571665883,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0049,
|
|
"num_tokens": 44962508.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097062319517136,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0078125,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 760.0,
|
|
"completions/max_terminated_length": 760.0,
|
|
"completions/mean_length": 225.88671875,
|
|
"completions/mean_terminated_length": 226.77256774902344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1856,
|
|
"grad_norm": 0.0006183154764585197,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.004,
|
|
"num_tokens": 45362343.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 116
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1122.0,
|
|
"completions/max_terminated_length": 1122.0,
|
|
"completions/mean_length": 224.234375,
|
|
"completions/mean_terminated_length": 224.234375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1872,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 45749667.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 117
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 853.0,
|
|
"completions/max_terminated_length": 853.0,
|
|
"completions/mean_length": 214.21875,
|
|
"completions/mean_terminated_length": 214.21875,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1888,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 46136491.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9609375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 118
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 808.0,
|
|
"completions/max_terminated_length": 808.0,
|
|
"completions/mean_length": 209.6796875,
|
|
"completions/mean_terminated_length": 209.6796875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.1904,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 46527353.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.9765625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 119
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1133.0,
|
|
"completions/max_terminated_length": 1133.0,
|
|
"completions/mean_length": 202.578125,
|
|
"completions/mean_terminated_length": 204.17323303222656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.001083175651729107,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0054,
|
|
"num_tokens": 46903165.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 634.0,
|
|
"completions/max_terminated_length": 634.0,
|
|
"completions/mean_length": 177.58203125,
|
|
"completions/mean_terminated_length": 177.58203125,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.1936,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 47267402.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0859375,
|
|
"rewards/brier_reward": 0.9140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 121
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 844.0,
|
|
"completions/max_terminated_length": 844.0,
|
|
"completions/mean_length": 209.1328125,
|
|
"completions/mean_terminated_length": 209.1328125,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.1952,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 47663876.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 122
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 709.0,
|
|
"completions/max_terminated_length": 709.0,
|
|
"completions/mean_length": 227.12109375,
|
|
"completions/mean_terminated_length": 228.01177978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.1968,
|
|
"grad_norm": 0.0005709947436116636,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0032,
|
|
"num_tokens": 48065971.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.08984375,
|
|
"rewards/brier_reward": 0.90625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 123
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 698.0,
|
|
"completions/max_terminated_length": 698.0,
|
|
"completions/mean_length": 197.13671875,
|
|
"completions/mean_terminated_length": 197.13671875,
|
|
"completions/min_length": 30.0,
|
|
"completions/min_terminated_length": 30.0,
|
|
"epoch": 0.1984,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 48455606.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.9375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 124
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 601.0,
|
|
"completions/max_terminated_length": 601.0,
|
|
"completions/mean_length": 173.9921875,
|
|
"completions/mean_terminated_length": 173.9921875,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2,
|
|
"grad_norm": 0.0014897036598995328,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0033,
|
|
"num_tokens": 48846340.0,
|
|
"reward": 0.9980478286743164,
|
|
"reward_std": 0.005524259991943836,
|
|
"rewards/accuracy_reward": 0.01171875,
|
|
"rewards/brier_reward": 0.984375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.00390625,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 923.0,
|
|
"completions/max_terminated_length": 923.0,
|
|
"completions/mean_length": 186.07421875,
|
|
"completions/mean_terminated_length": 186.07421875,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.2016,
|
|
"grad_norm": 0.0018973132828250527,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.002,
|
|
"num_tokens": 49242423.0,
|
|
"reward": 0.9941415786743164,
|
|
"reward_std": 0.016572803258895874,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.00390625,
|
|
"step": 126
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1248.0,
|
|
"completions/max_terminated_length": 1248.0,
|
|
"completions/mean_length": 184.8125,
|
|
"completions/mean_terminated_length": 185.53726196289062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2032,
|
|
"grad_norm": 0.0006578292814083397,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0035,
|
|
"num_tokens": 49638551.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 127
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 778.0,
|
|
"completions/max_terminated_length": 778.0,
|
|
"completions/mean_length": 153.65625,
|
|
"completions/mean_terminated_length": 153.65625,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.2048,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 50025647.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 128
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 667.0,
|
|
"completions/max_terminated_length": 667.0,
|
|
"completions/mean_length": 173.5390625,
|
|
"completions/mean_terminated_length": 173.5390625,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2064,
|
|
"grad_norm": 0.0015325110871344805,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 50417145.0,
|
|
"reward": 0.9980478286743164,
|
|
"reward_std": 0.005524259991943836,
|
|
"rewards/accuracy_reward": 0.07421875,
|
|
"rewards/brier_reward": 0.921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.00390625,
|
|
"step": 129
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 656.0,
|
|
"completions/max_terminated_length": 656.0,
|
|
"completions/mean_length": 159.5390625,
|
|
"completions/mean_terminated_length": 159.5390625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0027299756184220314,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0036,
|
|
"num_tokens": 50782203.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 897.0,
|
|
"completions/max_terminated_length": 897.0,
|
|
"completions/mean_length": 174.40234375,
|
|
"completions/mean_terminated_length": 174.40234375,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2096,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51173466.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.07421875,
|
|
"rewards/brier_reward": 0.92578125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 131
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 807.0,
|
|
"completions/max_terminated_length": 807.0,
|
|
"completions/mean_length": 203.46875,
|
|
"completions/mean_terminated_length": 203.46875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2112,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51570122.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.06640625,
|
|
"rewards/brier_reward": 0.93359375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 132
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 782.0,
|
|
"completions/max_terminated_length": 782.0,
|
|
"completions/mean_length": 201.28515625,
|
|
"completions/mean_terminated_length": 201.28515625,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2128,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 51958379.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 133
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 648.0,
|
|
"completions/max_terminated_length": 648.0,
|
|
"completions/mean_length": 191.67578125,
|
|
"completions/mean_terminated_length": 191.67578125,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2144,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 52341992.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 134
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 616.0,
|
|
"completions/max_terminated_length": 616.0,
|
|
"completions/mean_length": 212.33203125,
|
|
"completions/mean_terminated_length": 213.1647186279297,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.216,
|
|
"grad_norm": 0.0007441657362505794,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0037,
|
|
"num_tokens": 52729437.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1209.0,
|
|
"completions/max_terminated_length": 1209.0,
|
|
"completions/mean_length": 202.16015625,
|
|
"completions/mean_terminated_length": 202.16015625,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2176,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 53138966.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 136
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 669.0,
|
|
"completions/max_terminated_length": 669.0,
|
|
"completions/mean_length": 204.73046875,
|
|
"completions/mean_terminated_length": 204.73046875,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2192,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 53537097.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.09375,
|
|
"rewards/brier_reward": 0.90625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 137
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 882.0,
|
|
"completions/max_terminated_length": 882.0,
|
|
"completions/mean_length": 211.83984375,
|
|
"completions/mean_terminated_length": 211.83984375,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2208,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 53940168.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 138
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 709.0,
|
|
"completions/max_terminated_length": 709.0,
|
|
"completions/mean_length": 217.58203125,
|
|
"completions/mean_terminated_length": 218.435302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.2224,
|
|
"grad_norm": 0.0006282851682044566,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0034,
|
|
"num_tokens": 54337637.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.9375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 139
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 817.0,
|
|
"completions/max_terminated_length": 817.0,
|
|
"completions/mean_length": 235.42578125,
|
|
"completions/mean_terminated_length": 235.42578125,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 54745834.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1011.0,
|
|
"completions/max_terminated_length": 1011.0,
|
|
"completions/mean_length": 214.875,
|
|
"completions/mean_terminated_length": 214.875,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2256,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 55149106.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 141
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 894.0,
|
|
"completions/max_terminated_length": 894.0,
|
|
"completions/mean_length": 237.59765625,
|
|
"completions/mean_terminated_length": 237.59765625,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2272,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 55568331.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 142
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 786.0,
|
|
"completions/max_terminated_length": 786.0,
|
|
"completions/mean_length": 210.86328125,
|
|
"completions/mean_terminated_length": 210.86328125,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2288,
|
|
"grad_norm": 0.0012927583884447813,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 55980312.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.08984375,
|
|
"rewards/brier_reward": 0.90625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 143
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1181.0,
|
|
"completions/max_terminated_length": 1181.0,
|
|
"completions/mean_length": 213.9765625,
|
|
"completions/mean_terminated_length": 213.9765625,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.2304,
|
|
"grad_norm": 0.001867619575932622,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0031,
|
|
"num_tokens": 56373322.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 144
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 933.0,
|
|
"completions/max_terminated_length": 933.0,
|
|
"completions/mean_length": 239.46484375,
|
|
"completions/mean_terminated_length": 240.4039306640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.232,
|
|
"grad_norm": 0.0005441193352453411,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0035,
|
|
"num_tokens": 56791865.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.98046875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 890.0,
|
|
"completions/max_terminated_length": 890.0,
|
|
"completions/mean_length": 220.12890625,
|
|
"completions/mean_terminated_length": 220.12890625,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2336,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 57197338.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 146
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 683.0,
|
|
"completions/max_terminated_length": 683.0,
|
|
"completions/mean_length": 207.7890625,
|
|
"completions/mean_terminated_length": 208.6039276123047,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2352,
|
|
"grad_norm": 0.0005851782043464482,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0032,
|
|
"num_tokens": 57585476.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0703125,
|
|
"rewards/brier_reward": 0.92578125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 147
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 715.0,
|
|
"completions/max_terminated_length": 715.0,
|
|
"completions/mean_length": 206.08203125,
|
|
"completions/mean_terminated_length": 206.08203125,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2368,
|
|
"grad_norm": 5.514766598935239e-05,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 57981521.0,
|
|
"reward": 0.9999228119850159,
|
|
"reward_std": 0.00022097892360761762,
|
|
"rewards/accuracy_reward": 0.0390625,
|
|
"rewards/brier_reward": 0.9607812166213989,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0007812500116415322,
|
|
"step": 148
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 628.0,
|
|
"completions/max_terminated_length": 628.0,
|
|
"completions/mean_length": 215.3046875,
|
|
"completions/mean_terminated_length": 215.3046875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2384,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 58385359.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.01171875,
|
|
"rewards/brier_reward": 0.98828125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 149
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 800.0,
|
|
"completions/max_terminated_length": 800.0,
|
|
"completions/mean_length": 211.234375,
|
|
"completions/mean_terminated_length": 212.06275939941406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0006218485650606453,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.004,
|
|
"num_tokens": 58765427.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1196.0,
|
|
"completions/max_terminated_length": 1196.0,
|
|
"completions/mean_length": 230.0390625,
|
|
"completions/mean_terminated_length": 230.0390625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2416,
|
|
"grad_norm": 0.0008855098858475685,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 59164717.0,
|
|
"reward": 0.9952874779701233,
|
|
"reward_std": 0.01333173643797636,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.9593230485916138,
|
|
"rewards/confidence_one_or_zero": 0.9921875,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0030859375838190317,
|
|
"step": 151
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 918.0,
|
|
"completions/max_terminated_length": 918.0,
|
|
"completions/mean_length": 229.26171875,
|
|
"completions/mean_terminated_length": 229.26171875,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2432,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 59550792.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 152
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 882.0,
|
|
"completions/max_terminated_length": 882.0,
|
|
"completions/mean_length": 208.0078125,
|
|
"completions/mean_terminated_length": 208.0078125,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2448,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 59946482.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 153
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1041.0,
|
|
"completions/max_terminated_length": 1041.0,
|
|
"completions/mean_length": 230.6875,
|
|
"completions/mean_terminated_length": 230.6875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2464,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 60348042.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 154
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 711.0,
|
|
"completions/max_terminated_length": 711.0,
|
|
"completions/mean_length": 223.625,
|
|
"completions/mean_terminated_length": 223.625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.248,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 60753058.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.015625,
|
|
"rewards/brier_reward": 0.984375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1380.0,
|
|
"completions/max_terminated_length": 1380.0,
|
|
"completions/mean_length": 221.984375,
|
|
"completions/mean_terminated_length": 222.85491943359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2496,
|
|
"grad_norm": 0.0004327092319726944,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0023,
|
|
"num_tokens": 61151358.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.9609375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 156
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 822.0,
|
|
"completions/max_terminated_length": 822.0,
|
|
"completions/mean_length": 215.8046875,
|
|
"completions/mean_terminated_length": 216.65098571777344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2512,
|
|
"grad_norm": 0.0005879381787963212,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0029,
|
|
"num_tokens": 61542484.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.09375,
|
|
"rewards/brier_reward": 0.90234375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 157
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1006.0,
|
|
"completions/max_terminated_length": 1006.0,
|
|
"completions/mean_length": 242.6328125,
|
|
"completions/mean_terminated_length": 242.6328125,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.2528,
|
|
"grad_norm": 0.0002549047057982534,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 61962934.0,
|
|
"reward": 0.9990164041519165,
|
|
"reward_std": 0.002784787444397807,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.9706870913505554,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0027734374161809683,
|
|
"step": 158
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 768.0,
|
|
"completions/max_terminated_length": 768.0,
|
|
"completions/mean_length": 209.625,
|
|
"completions/mean_terminated_length": 209.625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2544,
|
|
"grad_norm": 0.0014845837140455842,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0019,
|
|
"num_tokens": 62359782.0,
|
|
"reward": 0.995782196521759,
|
|
"reward_std": 0.01193243358284235,
|
|
"rewards/accuracy_reward": 0.0078125,
|
|
"rewards/brier_reward": 0.9876562356948853,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0015625000232830644,
|
|
"step": 159
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 670.0,
|
|
"completions/max_terminated_length": 670.0,
|
|
"completions/mean_length": 211.0703125,
|
|
"completions/mean_terminated_length": 211.89805603027344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.002763985889032483,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0024,
|
|
"num_tokens": 62747432.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.06640625,
|
|
"rewards/brier_reward": 0.92578125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 880.0,
|
|
"completions/max_terminated_length": 880.0,
|
|
"completions/mean_length": 203.00390625,
|
|
"completions/mean_terminated_length": 203.00390625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2576,
|
|
"grad_norm": 0.002416276140138507,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0026,
|
|
"num_tokens": 63138033.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 161
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 774.0,
|
|
"completions/max_terminated_length": 774.0,
|
|
"completions/mean_length": 195.89453125,
|
|
"completions/mean_terminated_length": 195.89453125,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2592,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 63518966.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 162
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 883.0,
|
|
"completions/max_terminated_length": 883.0,
|
|
"completions/mean_length": 195.33984375,
|
|
"completions/mean_terminated_length": 196.10589599609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2608,
|
|
"grad_norm": 0.00068190653109923,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0042,
|
|
"num_tokens": 63907333.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.93359375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 163
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 611.0,
|
|
"completions/max_terminated_length": 611.0,
|
|
"completions/mean_length": 192.30859375,
|
|
"completions/mean_terminated_length": 192.30859375,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.2624,
|
|
"grad_norm": 0.0013250088086351752,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0029,
|
|
"num_tokens": 64304188.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 164
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 744.0,
|
|
"completions/max_terminated_length": 744.0,
|
|
"completions/mean_length": 173.0234375,
|
|
"completions/mean_terminated_length": 173.0234375,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.264,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 64678394.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.08203125,
|
|
"rewards/brier_reward": 0.91796875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 614.0,
|
|
"completions/max_terminated_length": 614.0,
|
|
"completions/mean_length": 177.8984375,
|
|
"completions/mean_terminated_length": 177.8984375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2656,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 65065976.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 166
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 868.0,
|
|
"completions/max_terminated_length": 868.0,
|
|
"completions/mean_length": 201.9609375,
|
|
"completions/mean_terminated_length": 202.75296020507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2672,
|
|
"grad_norm": 0.000607511552516371,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0034,
|
|
"num_tokens": 65473902.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.9765625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 167
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 896.0,
|
|
"completions/max_terminated_length": 896.0,
|
|
"completions/mean_length": 194.75,
|
|
"completions/mean_terminated_length": 195.51373291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2688,
|
|
"grad_norm": 0.0029569705948233604,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0016,
|
|
"num_tokens": 65863102.0,
|
|
"reward": 0.9921884536743164,
|
|
"reward_std": 0.022097086533904076,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.9921875,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 168
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 733.0,
|
|
"completions/max_terminated_length": 733.0,
|
|
"completions/mean_length": 204.63671875,
|
|
"completions/mean_terminated_length": 204.63671875,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2704,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 66262393.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 169
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 639.0,
|
|
"completions/max_terminated_length": 639.0,
|
|
"completions/mean_length": 194.66015625,
|
|
"completions/mean_terminated_length": 194.66015625,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 66633106.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 717.0,
|
|
"completions/max_terminated_length": 717.0,
|
|
"completions/mean_length": 204.015625,
|
|
"completions/mean_terminated_length": 204.015625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2736,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 67027142.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 171
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 756.0,
|
|
"completions/max_terminated_length": 756.0,
|
|
"completions/mean_length": 196.80859375,
|
|
"completions/mean_terminated_length": 196.80859375,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.2752,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 67429453.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 172
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 839.0,
|
|
"completions/max_terminated_length": 839.0,
|
|
"completions/mean_length": 194.81640625,
|
|
"completions/mean_terminated_length": 194.81640625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2768,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 67821702.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 173
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 654.0,
|
|
"completions/max_terminated_length": 654.0,
|
|
"completions/mean_length": 209.265625,
|
|
"completions/mean_terminated_length": 209.265625,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.2784,
|
|
"grad_norm": 0.000906936707906425,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 68223738.0,
|
|
"reward": 0.9980478286743164,
|
|
"reward_std": 0.005524259991943836,
|
|
"rewards/accuracy_reward": 0.01171875,
|
|
"rewards/brier_reward": 0.984375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.00390625,
|
|
"step": 174
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 624.0,
|
|
"completions/max_terminated_length": 624.0,
|
|
"completions/mean_length": 186.72265625,
|
|
"completions/mean_terminated_length": 186.72265625,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.28,
|
|
"grad_norm": 0.0019213669002056122,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 68601755.0,
|
|
"reward": 0.9943320155143738,
|
|
"reward_std": 0.0160341989248991,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.9495996236801147,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0037109374534338713,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 824.0,
|
|
"completions/max_terminated_length": 824.0,
|
|
"completions/mean_length": 198.609375,
|
|
"completions/mean_terminated_length": 198.609375,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.2816,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 68981303.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 176
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 711.0,
|
|
"completions/max_terminated_length": 711.0,
|
|
"completions/mean_length": 184.6328125,
|
|
"completions/mean_terminated_length": 184.6328125,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 31.0,
|
|
"epoch": 0.2832,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 69372873.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05859375,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 177
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 710.0,
|
|
"completions/max_terminated_length": 710.0,
|
|
"completions/mean_length": 192.1171875,
|
|
"completions/mean_terminated_length": 192.1171875,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2848,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 69755615.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.9765625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 178
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 590.0,
|
|
"completions/max_terminated_length": 590.0,
|
|
"completions/mean_length": 202.31640625,
|
|
"completions/mean_terminated_length": 202.31640625,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.2864,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 70147784.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 179
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 841.0,
|
|
"completions/max_terminated_length": 841.0,
|
|
"completions/mean_length": 183.59375,
|
|
"completions/mean_terminated_length": 183.59375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 70521872.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 626.0,
|
|
"completions/max_terminated_length": 626.0,
|
|
"completions/mean_length": 180.97265625,
|
|
"completions/mean_terminated_length": 180.97265625,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.2896,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 70908449.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.02734375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 181
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 769.0,
|
|
"completions/max_terminated_length": 769.0,
|
|
"completions/mean_length": 211.77734375,
|
|
"completions/mean_terminated_length": 211.77734375,
|
|
"completions/min_length": 40.0,
|
|
"completions/min_terminated_length": 40.0,
|
|
"epoch": 0.2912,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 71296752.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.01953125,
|
|
"rewards/brier_reward": 0.98046875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 182
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 819.0,
|
|
"completions/max_terminated_length": 819.0,
|
|
"completions/mean_length": 183.44921875,
|
|
"completions/mean_terminated_length": 183.44921875,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.2928,
|
|
"grad_norm": 7.136356725823134e-05,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 71673451.0,
|
|
"reward": 0.9998689293861389,
|
|
"reward_std": 0.0003734485653694719,
|
|
"rewards/accuracy_reward": 0.08203125,
|
|
"rewards/brier_reward": 0.917704701423645,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.001015624962747097,
|
|
"step": 183
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 695.0,
|
|
"completions/max_terminated_length": 695.0,
|
|
"completions/mean_length": 189.8828125,
|
|
"completions/mean_terminated_length": 189.8828125,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.2944,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 72061885.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.94921875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 184
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 969.0,
|
|
"completions/max_terminated_length": 969.0,
|
|
"completions/mean_length": 187.76171875,
|
|
"completions/mean_terminated_length": 187.76171875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.296,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 72450952.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 887.0,
|
|
"completions/max_terminated_length": 887.0,
|
|
"completions/mean_length": 207.46875,
|
|
"completions/mean_terminated_length": 208.28236389160156,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2976,
|
|
"grad_norm": 0.0005568548804149032,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.004,
|
|
"num_tokens": 72836712.0,
|
|
"reward": 0.9958820343017578,
|
|
"reward_std": 0.011650143191218376,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.964418351650238,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0012890625512227416,
|
|
"step": 186
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 792.0,
|
|
"completions/max_terminated_length": 792.0,
|
|
"completions/mean_length": 194.98046875,
|
|
"completions/mean_terminated_length": 194.98046875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.2992,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 73223731.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 187
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 633.0,
|
|
"completions/max_terminated_length": 633.0,
|
|
"completions/mean_length": 193.58984375,
|
|
"completions/mean_terminated_length": 193.58984375,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.3008,
|
|
"grad_norm": 0.0024952238891273737,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0031,
|
|
"num_tokens": 73628866.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.97265625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 188
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 852.0,
|
|
"completions/max_terminated_length": 852.0,
|
|
"completions/mean_length": 190.9921875,
|
|
"completions/mean_terminated_length": 190.9921875,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.3024,
|
|
"grad_norm": 0.00012831162894144654,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 74010240.0,
|
|
"reward": 0.9997882843017578,
|
|
"reward_std": 0.0006015999824739993,
|
|
"rewards/accuracy_reward": 0.0625,
|
|
"rewards/brier_reward": 0.937074601650238,
|
|
"rewards/confidence_one_or_zero": 0.99609375,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0012890625512227416,
|
|
"step": 189
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 561.0,
|
|
"completions/max_terminated_length": 561.0,
|
|
"completions/mean_length": 170.89453125,
|
|
"completions/mean_terminated_length": 171.56471252441406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0007622085977345705,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0035,
|
|
"num_tokens": 74393053.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.0546875,
|
|
"rewards/brier_reward": 0.94140625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 716.0,
|
|
"completions/max_terminated_length": 716.0,
|
|
"completions/mean_length": 196.12890625,
|
|
"completions/mean_terminated_length": 196.12890625,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.3056,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 74787998.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 191
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 721.0,
|
|
"completions/max_terminated_length": 721.0,
|
|
"completions/mean_length": 186.24609375,
|
|
"completions/mean_terminated_length": 186.24609375,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.3072,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 75184333.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03125,
|
|
"rewards/brier_reward": 0.96875,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 192
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 907.0,
|
|
"completions/max_terminated_length": 907.0,
|
|
"completions/mean_length": 190.98046875,
|
|
"completions/mean_terminated_length": 190.98046875,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.3088,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 75581816.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 193
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 600.0,
|
|
"completions/max_terminated_length": 600.0,
|
|
"completions/mean_length": 180.5234375,
|
|
"completions/mean_terminated_length": 180.5234375,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.3104,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 75974278.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.96484375,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 194
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00390625,
|
|
"completions/max_length": 1145.0,
|
|
"completions/max_terminated_length": 1145.0,
|
|
"completions/mean_length": 188.93359375,
|
|
"completions/mean_terminated_length": 189.6745147705078,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.312,
|
|
"grad_norm": 0.0033796713687479496,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0026,
|
|
"num_tokens": 76363445.0,
|
|
"reward": 0.9882822036743164,
|
|
"reward_std": 0.03314562886953354,
|
|
"rewards/accuracy_reward": 0.03515625,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.98828125,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 806.0,
|
|
"completions/max_terminated_length": 806.0,
|
|
"completions/mean_length": 201.71484375,
|
|
"completions/mean_terminated_length": 201.71484375,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.3136,
|
|
"grad_norm": 0.001726991031318903,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0015,
|
|
"num_tokens": 76759284.0,
|
|
"reward": 0.9960947036743164,
|
|
"reward_std": 0.011048543266952038,
|
|
"rewards/accuracy_reward": 0.05078125,
|
|
"rewards/brier_reward": 0.9453125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 0.99609375,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 196
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 621.0,
|
|
"completions/max_terminated_length": 621.0,
|
|
"completions/mean_length": 194.421875,
|
|
"completions/mean_terminated_length": 194.421875,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.3152,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 77152680.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.04296875,
|
|
"rewards/brier_reward": 0.95703125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 197
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 593.0,
|
|
"completions/max_terminated_length": 593.0,
|
|
"completions/mean_length": 176.81640625,
|
|
"completions/mean_terminated_length": 176.81640625,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 35.0,
|
|
"epoch": 0.3168,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 77542057.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.046875,
|
|
"rewards/brier_reward": 0.953125,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 198
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 836.0,
|
|
"completions/max_terminated_length": 836.0,
|
|
"completions/mean_length": 197.02734375,
|
|
"completions/mean_terminated_length": 197.02734375,
|
|
"completions/min_length": 38.0,
|
|
"completions/min_terminated_length": 38.0,
|
|
"epoch": 0.3184,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 77937704.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0234375,
|
|
"rewards/brier_reward": 0.9765625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 199
|
|
},
|
|
{
|
|
"calib/answer_extract_rate": 0.0,
|
|
"calib/avg_num_step_conf": 0.0,
|
|
"calib/final_conf_rate": 0.0,
|
|
"calib/format_rate": 0.0,
|
|
"calib/nonempty_final_conf_rate": 0.0,
|
|
"calib/nonempty_reasoning_rate": 0.0,
|
|
"calib/nonempty_step_conf_rate": 0.0,
|
|
"calib/step_conf_rate": 0.0,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 592.0,
|
|
"completions/max_terminated_length": 592.0,
|
|
"completions/mean_length": 169.27734375,
|
|
"completions/mean_terminated_length": 169.27734375,
|
|
"completions/min_length": 37.0,
|
|
"completions/min_terminated_length": 37.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 78296319.0,
|
|
"reward": 1.0000009536743164,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.09375,
|
|
"rewards/brier_reward": 0.90625,
|
|
"rewards/confidence_one_or_zero": 1.0,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/mean_confidence_reward": 0.0,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.007609076579804963,
|
|
"train_runtime": 8450.9766,
|
|
"train_samples_per_second": 6.058,
|
|
"train_steps_per_second": 0.024
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 78296319,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 20,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|