583 lines
20 KiB
JSON
583 lines
20 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.9984,
|
||
|
|
"eval_steps": 15,
|
||
|
|
"global_step": 78,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.03046875,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 992.8,
|
||
|
|
"completions/mean_length": 148.38466796875,
|
||
|
|
"completions/mean_terminated_length": 120.88012237548828,
|
||
|
|
"completions/min_length": 2.0,
|
||
|
|
"completions/min_terminated_length": 2.0,
|
||
|
|
"epoch": 0.064,
|
||
|
|
"grad_norm": 0.016000226140022278,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.008,
|
||
|
|
"num_tokens": 13353659.0,
|
||
|
|
"reward": 0.43779296875,
|
||
|
|
"reward_std": 0.30512999892234804,
|
||
|
|
"rewards/accuracy_reward": 0.18955078125,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.68603515625,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.01142578125,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 888.2,
|
||
|
|
"completions/mean_length": 113.01220703125,
|
||
|
|
"completions/mean_terminated_length": 102.50659942626953,
|
||
|
|
"completions/min_length": 4.4,
|
||
|
|
"completions/min_terminated_length": 4.4,
|
||
|
|
"epoch": 0.128,
|
||
|
|
"grad_norm": 0.005569960456341505,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0183,
|
||
|
|
"num_tokens": 26368200.0,
|
||
|
|
"reward": 0.5927734375,
|
||
|
|
"reward_std": 0.18429518938064576,
|
||
|
|
"rewards/accuracy_reward": 0.26328125,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.922265625,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.004296875,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 834.0,
|
||
|
|
"completions/mean_length": 84.0111328125,
|
||
|
|
"completions/mean_terminated_length": 79.96037445068359,
|
||
|
|
"completions/min_length": 9.2,
|
||
|
|
"completions/min_terminated_length": 9.2,
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 0.002098287222906947,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0082,
|
||
|
|
"num_tokens": 39090618.0,
|
||
|
|
"reward": 0.672998046875,
|
||
|
|
"reward_std": 0.1340289294719696,
|
||
|
|
"rewards/accuracy_reward": 0.36337890625,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.9826171875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.192,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 247.5,
|
||
|
|
"eval_completions/max_terminated_length": 247.5,
|
||
|
|
"eval_completions/mean_length": 74.62883949279785,
|
||
|
|
"eval_completions/mean_terminated_length": 74.62883949279785,
|
||
|
|
"eval_completions/min_length": 22.5,
|
||
|
|
"eval_completions/min_terminated_length": 22.5,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 39090618.0,
|
||
|
|
"eval_reward": 0.65625,
|
||
|
|
"eval_reward_std": 0.21594678610563278,
|
||
|
|
"eval_rewards/accuracy_reward": 0.314453125,
|
||
|
|
"eval_rewards/brier_reward": 0.0,
|
||
|
|
"eval_rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"eval_rewards/format_reward": 0.998046875,
|
||
|
|
"eval_rewards/mean_confidence_reward": 0.0,
|
||
|
|
"eval_runtime": 24.5821,
|
||
|
|
"eval_samples_per_second": 20.34,
|
||
|
|
"eval_steps_per_second": 0.163,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0013671875,
|
||
|
|
"completions/max_length": 885.6,
|
||
|
|
"completions/max_terminated_length": 481.4,
|
||
|
|
"completions/mean_length": 75.769140625,
|
||
|
|
"completions/mean_terminated_length": 74.47151794433594,
|
||
|
|
"completions/min_length": 12.8,
|
||
|
|
"completions/min_terminated_length": 12.8,
|
||
|
|
"epoch": 0.256,
|
||
|
|
"grad_norm": 0.0015658332267776132,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"num_tokens": 51845790.0,
|
||
|
|
"reward": 0.708984375,
|
||
|
|
"reward_std": 0.10655935555696487,
|
||
|
|
"rewards/accuracy_reward": 0.42177734375,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99619140625,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00048828125,
|
||
|
|
"completions/max_length": 709.8,
|
||
|
|
"completions/max_terminated_length": 506.6,
|
||
|
|
"completions/mean_length": 73.475,
|
||
|
|
"completions/mean_terminated_length": 73.0110580444336,
|
||
|
|
"completions/min_length": 19.2,
|
||
|
|
"completions/min_terminated_length": 19.2,
|
||
|
|
"epoch": 0.32,
|
||
|
|
"grad_norm": 0.0015233514131978154,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0002,
|
||
|
|
"num_tokens": 64440126.0,
|
||
|
|
"reward": 0.705615234375,
|
||
|
|
"reward_std": 0.10088382810354232,
|
||
|
|
"rewards/accuracy_reward": 0.4138671875,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99736328125,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 952.0,
|
||
|
|
"completions/max_terminated_length": 627.2,
|
||
|
|
"completions/mean_length": 76.364453125,
|
||
|
|
"completions/mean_terminated_length": 75.99405670166016,
|
||
|
|
"completions/min_length": 18.6,
|
||
|
|
"completions/min_terminated_length": 18.6,
|
||
|
|
"epoch": 0.384,
|
||
|
|
"grad_norm": 0.002182575175538659,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"num_tokens": 77054626.0,
|
||
|
|
"reward": 0.708837890625,
|
||
|
|
"reward_std": 0.0894511729478836,
|
||
|
|
"rewards/accuracy_reward": 0.4197265625,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99794921875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.384,
|
||
|
|
"eval_completions/clipped_ratio": 0.002155172413793094,
|
||
|
|
"eval_completions/max_length": 414.5,
|
||
|
|
"eval_completions/max_terminated_length": 204.75,
|
||
|
|
"eval_completions/mean_length": 79.66776657104492,
|
||
|
|
"eval_completions/mean_terminated_length": 77.63948631286621,
|
||
|
|
"eval_completions/min_length": 31.25,
|
||
|
|
"eval_completions/min_terminated_length": 31.25,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 77054626.0,
|
||
|
|
"eval_reward": 0.6640625,
|
||
|
|
"eval_reward_std": 0.22485817223787308,
|
||
|
|
"eval_rewards/accuracy_reward": 0.33203125,
|
||
|
|
"eval_rewards/brier_reward": 0.0,
|
||
|
|
"eval_rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"eval_rewards/format_reward": 0.99609375,
|
||
|
|
"eval_rewards/mean_confidence_reward": 0.0,
|
||
|
|
"eval_runtime": 33.0856,
|
||
|
|
"eval_samples_per_second": 15.112,
|
||
|
|
"eval_steps_per_second": 0.121,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00048828125,
|
||
|
|
"completions/max_length": 932.6,
|
||
|
|
"completions/max_terminated_length": 520.6,
|
||
|
|
"completions/mean_length": 80.10986328125,
|
||
|
|
"completions/mean_terminated_length": 79.64854736328125,
|
||
|
|
"completions/min_length": 18.6,
|
||
|
|
"completions/min_terminated_length": 18.6,
|
||
|
|
"epoch": 0.448,
|
||
|
|
"grad_norm": 0.0010561308590695262,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"num_tokens": 89702463.0,
|
||
|
|
"reward": 0.72060546875,
|
||
|
|
"reward_std": 0.08864349871873856,
|
||
|
|
"rewards/accuracy_reward": 0.4431640625,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.998046875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 800.6,
|
||
|
|
"completions/max_terminated_length": 461.4,
|
||
|
|
"completions/mean_length": 86.64755859375,
|
||
|
|
"completions/mean_terminated_length": 86.28123168945312,
|
||
|
|
"completions/min_length": 24.6,
|
||
|
|
"completions/min_terminated_length": 24.6,
|
||
|
|
"epoch": 0.512,
|
||
|
|
"grad_norm": 0.0010048149852082133,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"num_tokens": 102614142.0,
|
||
|
|
"reward": 0.721337890625,
|
||
|
|
"reward_std": 0.08699959516525269,
|
||
|
|
"rewards/accuracy_reward": 0.44345703125,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 923.6,
|
||
|
|
"completions/max_terminated_length": 530.4,
|
||
|
|
"completions/mean_length": 89.90283203125,
|
||
|
|
"completions/mean_terminated_length": 88.98892669677734,
|
||
|
|
"completions/min_length": 18.8,
|
||
|
|
"completions/min_terminated_length": 18.8,
|
||
|
|
"epoch": 0.576,
|
||
|
|
"grad_norm": 0.0009294641204178333,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"num_tokens": 115441355.0,
|
||
|
|
"reward": 0.732666015625,
|
||
|
|
"reward_std": 0.08389391750097275,
|
||
|
|
"rewards/accuracy_reward": 0.4677734375,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99755859375,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.576,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 269.5,
|
||
|
|
"eval_completions/max_terminated_length": 269.5,
|
||
|
|
"eval_completions/mean_length": 92.64102935791016,
|
||
|
|
"eval_completions/mean_terminated_length": 92.64102935791016,
|
||
|
|
"eval_completions/min_length": 35.5,
|
||
|
|
"eval_completions/min_terminated_length": 35.5,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 115441355.0,
|
||
|
|
"eval_reward": 0.6865234375,
|
||
|
|
"eval_reward_std": 0.23618583008646965,
|
||
|
|
"eval_rewards/accuracy_reward": 0.373046875,
|
||
|
|
"eval_rewards/brier_reward": 0.0,
|
||
|
|
"eval_rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"eval_rewards/format_reward": 1.0,
|
||
|
|
"eval_rewards/mean_confidence_reward": 0.0,
|
||
|
|
"eval_runtime": 26.3838,
|
||
|
|
"eval_samples_per_second": 18.951,
|
||
|
|
"eval_steps_per_second": 0.152,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 568.2,
|
||
|
|
"completions/mean_length": 94.26240234375,
|
||
|
|
"completions/mean_terminated_length": 93.44497528076172,
|
||
|
|
"completions/min_length": 24.4,
|
||
|
|
"completions/min_terminated_length": 24.4,
|
||
|
|
"epoch": 0.64,
|
||
|
|
"grad_norm": 0.0007046961691230536,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0018,
|
||
|
|
"num_tokens": 128476370.0,
|
||
|
|
"reward": 0.735302734375,
|
||
|
|
"reward_std": 0.0788412094116211,
|
||
|
|
"rewards/accuracy_reward": 0.47177734375,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.998828125,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 514.8,
|
||
|
|
"completions/mean_length": 95.73056640625,
|
||
|
|
"completions/mean_terminated_length": 94.82296295166016,
|
||
|
|
"completions/min_length": 28.2,
|
||
|
|
"completions/min_terminated_length": 28.2,
|
||
|
|
"epoch": 0.704,
|
||
|
|
"grad_norm": 0.0007479747291654348,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0014,
|
||
|
|
"num_tokens": 141210483.0,
|
||
|
|
"reward": 0.728857421875,
|
||
|
|
"reward_std": 0.07646729648113251,
|
||
|
|
"rewards/accuracy_reward": 0.45927734375,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.9984375,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 896.2,
|
||
|
|
"completions/max_terminated_length": 469.8,
|
||
|
|
"completions/mean_length": 95.0162109375,
|
||
|
|
"completions/mean_terminated_length": 94.38102111816406,
|
||
|
|
"completions/min_length": 24.4,
|
||
|
|
"completions/min_terminated_length": 24.4,
|
||
|
|
"epoch": 0.768,
|
||
|
|
"grad_norm": 0.0006520415190607309,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0017,
|
||
|
|
"num_tokens": 154067105.0,
|
||
|
|
"reward": 0.741455078125,
|
||
|
|
"reward_std": 0.07960962057113648,
|
||
|
|
"rewards/accuracy_reward": 0.48369140625,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.768,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 263.75,
|
||
|
|
"eval_completions/max_terminated_length": 263.75,
|
||
|
|
"eval_completions/mean_length": 96.76313209533691,
|
||
|
|
"eval_completions/mean_terminated_length": 96.76313209533691,
|
||
|
|
"eval_completions/min_length": 38.25,
|
||
|
|
"eval_completions/min_terminated_length": 38.25,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 154067105.0,
|
||
|
|
"eval_reward": 0.69140625,
|
||
|
|
"eval_reward_std": 0.23931611329317093,
|
||
|
|
"eval_rewards/accuracy_reward": 0.384765625,
|
||
|
|
"eval_rewards/brier_reward": 0.0,
|
||
|
|
"eval_rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"eval_rewards/format_reward": 0.998046875,
|
||
|
|
"eval_rewards/mean_confidence_reward": 0.0,
|
||
|
|
"eval_runtime": 25.814,
|
||
|
|
"eval_samples_per_second": 19.369,
|
||
|
|
"eval_steps_per_second": 0.155,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 774.0,
|
||
|
|
"completions/max_terminated_length": 411.2,
|
||
|
|
"completions/mean_length": 94.5296875,
|
||
|
|
"completions/mean_terminated_length": 94.16671752929688,
|
||
|
|
"completions/min_length": 32.4,
|
||
|
|
"completions/min_terminated_length": 32.4,
|
||
|
|
"epoch": 0.832,
|
||
|
|
"grad_norm": 0.0007356010028161108,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0007,
|
||
|
|
"num_tokens": 166964521.0,
|
||
|
|
"reward": 0.751513671875,
|
||
|
|
"reward_std": 0.07600467056035995,
|
||
|
|
"rewards/accuracy_reward": 0.50341796875,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.999609375,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0013671875,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 443.2,
|
||
|
|
"completions/mean_length": 97.67490234375,
|
||
|
|
"completions/mean_terminated_length": 96.40738067626953,
|
||
|
|
"completions/min_length": 30.8,
|
||
|
|
"completions/min_terminated_length": 30.8,
|
||
|
|
"epoch": 0.896,
|
||
|
|
"grad_norm": 0.000633634568657726,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0024,
|
||
|
|
"num_tokens": 179885808.0,
|
||
|
|
"reward": 0.743115234375,
|
||
|
|
"reward_std": 0.07322432547807693,
|
||
|
|
"rewards/accuracy_reward": 0.48759765625,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.9986328125,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 1024.0,
|
||
|
|
"completions/max_terminated_length": 403.2,
|
||
|
|
"completions/mean_length": 98.701171875,
|
||
|
|
"completions/mean_terminated_length": 97.97780151367188,
|
||
|
|
"completions/min_length": 29.0,
|
||
|
|
"completions/min_terminated_length": 29.0,
|
||
|
|
"epoch": 0.96,
|
||
|
|
"grad_norm": 0.0007835639989934862,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0012,
|
||
|
|
"num_tokens": 192751292.0,
|
||
|
|
"reward": 0.739453125,
|
||
|
|
"reward_std": 0.07245174199342727,
|
||
|
|
"rewards/accuracy_reward": 0.4796875,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 253.25,
|
||
|
|
"eval_completions/max_terminated_length": 253.25,
|
||
|
|
"eval_completions/mean_length": 96.76939582824707,
|
||
|
|
"eval_completions/mean_terminated_length": 96.76939582824707,
|
||
|
|
"eval_completions/min_length": 46.0,
|
||
|
|
"eval_completions/min_terminated_length": 46.0,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 192751292.0,
|
||
|
|
"eval_reward": 0.7021484375,
|
||
|
|
"eval_reward_std": 0.23983138427138329,
|
||
|
|
"eval_rewards/accuracy_reward": 0.404296875,
|
||
|
|
"eval_rewards/brier_reward": 0.0,
|
||
|
|
"eval_rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"eval_rewards/format_reward": 1.0,
|
||
|
|
"eval_rewards/mean_confidence_reward": 0.0,
|
||
|
|
"eval_runtime": 25.6674,
|
||
|
|
"eval_samples_per_second": 19.48,
|
||
|
|
"eval_steps_per_second": 0.156,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0,
|
||
|
|
"completions/max_length": 576.6666666666666,
|
||
|
|
"completions/max_terminated_length": 576.6666666666666,
|
||
|
|
"completions/mean_length": 97.17679595947266,
|
||
|
|
"completions/mean_terminated_length": 97.17679595947266,
|
||
|
|
"completions/min_length": 32.333333333333336,
|
||
|
|
"completions/min_terminated_length": 32.333333333333336,
|
||
|
|
"epoch": 0.9984,
|
||
|
|
"num_tokens": 200463724.0,
|
||
|
|
"reward": 0.7422688802083334,
|
||
|
|
"reward_std": 0.07581798732280731,
|
||
|
|
"rewards/accuracy_reward": 0.4845377604166667,
|
||
|
|
"rewards/brier_reward": 0.0,
|
||
|
|
"rewards/confidence_one_or_zero": 0.0,
|
||
|
|
"rewards/format_reward": 1.0,
|
||
|
|
"rewards/mean_confidence_reward": 0.0,
|
||
|
|
"step": 78,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.0031132084311535344,
|
||
|
|
"train_runtime": 28707.0758,
|
||
|
|
"train_samples_per_second": 0.697,
|
||
|
|
"train_steps_per_second": 0.003
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 78,
|
||
|
|
"num_input_tokens_seen": 200463724,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 60,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 4,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|