575 lines
21 KiB
JSON
575 lines
21 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.5151515151515151,
|
|
"eval_steps": 500,
|
|
"global_step": 100,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 177.8,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 177.8,
|
|
"completions/max_terminated_length": 177.8,
|
|
"completions/mean_length": 157.85000610351562,
|
|
"completions/mean_terminated_length": 157.85000610351562,
|
|
"completions/min_length": 136.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.07575757575757576,
|
|
"frac_reward_zero_std": 0.4000000059604645,
|
|
"grad_norm": 1.5840047597885132,
|
|
"kl": 0.0010059793893522702,
|
|
"learning_rate": 1.6000000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 73447.0,
|
|
"reward": 0.5880883574485779,
|
|
"reward_std": 0.020529226586222648,
|
|
"rewards/reward_function/mean": 0.5880883395671844,
|
|
"rewards/reward_function/std": 0.06562883183360099,
|
|
"step": 5
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 176.2,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 176.2,
|
|
"completions/max_terminated_length": 176.2,
|
|
"completions/mean_length": 156.65000915527344,
|
|
"completions/mean_terminated_length": 156.65000915527344,
|
|
"completions/min_length": 138.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.15151515151515152,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 1.6390373706817627,
|
|
"kl": 0.0017284046276472508,
|
|
"learning_rate": 3.6000000000000003e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 146334.0,
|
|
"reward": 0.605418348312378,
|
|
"reward_std": 0.02508251890540123,
|
|
"rewards/reward_function/mean": 0.60541832447052,
|
|
"rewards/reward_function/std": 0.06859094277024269,
|
|
"step": 10
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 176.4,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 176.4,
|
|
"completions/max_terminated_length": 176.4,
|
|
"completions/mean_length": 157.0000030517578,
|
|
"completions/mean_terminated_length": 157.0000030517578,
|
|
"completions/min_length": 140.8,
|
|
"completions/min_terminated_length": 140.8,
|
|
"epoch": 0.22727272727272727,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.7626600861549377,
|
|
"kl": 0.003397522373901059,
|
|
"learning_rate": 5.600000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 219198.0,
|
|
"reward": 0.5862850427627564,
|
|
"reward_std": 0.036518129706382754,
|
|
"rewards/reward_function/mean": 0.5862850069999694,
|
|
"rewards/reward_function/std": 0.08488646671175956,
|
|
"step": 15
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 183.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 183.0,
|
|
"completions/max_terminated_length": 183.0,
|
|
"completions/mean_length": 156.98334045410155,
|
|
"completions/mean_terminated_length": 156.98334045410155,
|
|
"completions/min_length": 139.8,
|
|
"completions/min_terminated_length": 139.8,
|
|
"epoch": 0.30303030303030304,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.9624250531196594,
|
|
"kl": 0.007015585945919156,
|
|
"learning_rate": 7.600000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 291737.0,
|
|
"reward": 0.6001700401306153,
|
|
"reward_std": 0.025772593356668948,
|
|
"rewards/reward_function/mean": 0.6001700043678284,
|
|
"rewards/reward_function/std": 0.07909451425075531,
|
|
"step": 20
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 172.2,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 172.2,
|
|
"completions/max_terminated_length": 172.2,
|
|
"completions/mean_length": 155.23333740234375,
|
|
"completions/mean_terminated_length": 155.23333740234375,
|
|
"completions/min_length": 138.2,
|
|
"completions/min_terminated_length": 138.2,
|
|
"epoch": 0.3787878787878788,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.5355867743492126,
|
|
"kl": 0.009492208405087391,
|
|
"learning_rate": 9.600000000000001e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 364535.0,
|
|
"reward": 0.5766633510589599,
|
|
"reward_std": 0.04085115455091,
|
|
"rewards/reward_function/mean": 0.576663339138031,
|
|
"rewards/reward_function/std": 0.10587597712874412,
|
|
"step": 25
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 198.4,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 198.4,
|
|
"completions/max_terminated_length": 198.4,
|
|
"completions/mean_length": 153.6166748046875,
|
|
"completions/mean_terminated_length": 153.6166748046875,
|
|
"completions/min_length": 136.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.45454545454545453,
|
|
"frac_reward_zero_std": 0.13333333730697633,
|
|
"grad_norm": 0.6549646854400635,
|
|
"kl": 0.061492755884925525,
|
|
"learning_rate": 1.16e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 436992.0,
|
|
"reward": 0.5978150248527527,
|
|
"reward_std": 0.04262940138578415,
|
|
"rewards/reward_function/mean": 0.5978150129318237,
|
|
"rewards/reward_function/std": 0.09431936666369438,
|
|
"step": 30
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 176.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 176.0,
|
|
"completions/max_terminated_length": 176.0,
|
|
"completions/mean_length": 158.86667175292968,
|
|
"completions/mean_terminated_length": 158.86667175292968,
|
|
"completions/min_length": 145.0,
|
|
"completions/min_terminated_length": 145.0,
|
|
"epoch": 0.5303030303030303,
|
|
"frac_reward_zero_std": 0.40000001192092893,
|
|
"grad_norm": 0.4725801348686218,
|
|
"kl": 2.357983988771836,
|
|
"learning_rate": 1.3600000000000002e-05,
|
|
"loss": 0.0024,
|
|
"num_tokens": 509788.0,
|
|
"reward": 0.6049700140953064,
|
|
"reward_std": 0.012831439916044473,
|
|
"rewards/reward_function/mean": 0.6049699783325195,
|
|
"rewards/reward_function/std": 0.08928216472268105,
|
|
"step": 35
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 172.4,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 172.4,
|
|
"completions/max_terminated_length": 172.4,
|
|
"completions/mean_length": 154.5500030517578,
|
|
"completions/mean_terminated_length": 154.5500030517578,
|
|
"completions/min_length": 138.8,
|
|
"completions/min_terminated_length": 138.8,
|
|
"epoch": 0.6060606060606061,
|
|
"frac_reward_zero_std": 0.4666666746139526,
|
|
"grad_norm": 0.41579461097717285,
|
|
"kl": 0.10282722649474939,
|
|
"learning_rate": 1.5600000000000003e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 582789.0,
|
|
"reward": 0.5595033764839172,
|
|
"reward_std": 0.014409982354845852,
|
|
"rewards/reward_function/mean": 0.5595033466815948,
|
|
"rewards/reward_function/std": 0.053104204079136255,
|
|
"step": 40
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 202.6,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 202.6,
|
|
"completions/max_terminated_length": 202.6,
|
|
"completions/mean_length": 158.20000305175782,
|
|
"completions/mean_terminated_length": 158.20000305175782,
|
|
"completions/min_length": 136.0,
|
|
"completions/min_terminated_length": 136.0,
|
|
"epoch": 0.6818181818181818,
|
|
"frac_reward_zero_std": 0.33333333730697634,
|
|
"grad_norm": 0.03286667913198471,
|
|
"kl": 3726.0936788400013,
|
|
"learning_rate": 1.76e-05,
|
|
"loss": 3.7261,
|
|
"num_tokens": 655565.0,
|
|
"reward": 0.5829650402069092,
|
|
"reward_std": 0.031194474175572397,
|
|
"rewards/reward_function/mean": 0.5829649925231933,
|
|
"rewards/reward_function/std": 0.09724260903894902,
|
|
"step": 45
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 180.4,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 180.4,
|
|
"completions/max_terminated_length": 180.4,
|
|
"completions/mean_length": 157.5500030517578,
|
|
"completions/mean_terminated_length": 157.5500030517578,
|
|
"completions/min_length": 141.2,
|
|
"completions/min_terminated_length": 141.2,
|
|
"epoch": 0.7575757575757576,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.45231395959854126,
|
|
"kl": 0.24270717451969784,
|
|
"learning_rate": 1.9600000000000002e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 727754.0,
|
|
"reward": 0.6422233581542969,
|
|
"reward_std": 0.015453202556818724,
|
|
"rewards/reward_function/mean": 0.642223310470581,
|
|
"rewards/reward_function/std": 0.08873879238963127,
|
|
"step": 50
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 174.8,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 174.8,
|
|
"completions/max_terminated_length": 174.8,
|
|
"completions/mean_length": 153.8166717529297,
|
|
"completions/mean_terminated_length": 153.8166717529297,
|
|
"completions/min_length": 138.2,
|
|
"completions/min_terminated_length": 138.2,
|
|
"epoch": 0.8333333333333334,
|
|
"frac_reward_zero_std": 0.20000000596046447,
|
|
"grad_norm": 0.3569779694080353,
|
|
"kl": 0.25451052089532217,
|
|
"learning_rate": 1.9822222222222226e-05,
|
|
"loss": 0.0003,
|
|
"num_tokens": 800059.0,
|
|
"reward": 0.6014716982841491,
|
|
"reward_std": 0.015854166075587272,
|
|
"rewards/reward_function/mean": 0.6014716625213623,
|
|
"rewards/reward_function/std": 0.10040064603090286,
|
|
"step": 55
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 173.2,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 173.2,
|
|
"completions/max_terminated_length": 173.2,
|
|
"completions/mean_length": 153.4666717529297,
|
|
"completions/mean_terminated_length": 153.4666717529297,
|
|
"completions/min_length": 135.6,
|
|
"completions/min_terminated_length": 135.6,
|
|
"epoch": 0.9090909090909091,
|
|
"frac_reward_zero_std": 0.20000000596046447,
|
|
"grad_norm": 0.4230985939502716,
|
|
"kl": 0.10768474241097768,
|
|
"learning_rate": 1.9600000000000002e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 872639.0,
|
|
"reward": 0.6015583634376526,
|
|
"reward_std": 0.020151399821043015,
|
|
"rewards/reward_function/mean": 0.6015583276748657,
|
|
"rewards/reward_function/std": 0.1292761668562889,
|
|
"step": 60
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 164.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 164.0,
|
|
"completions/max_terminated_length": 164.0,
|
|
"completions/mean_length": 148.58334045410157,
|
|
"completions/mean_terminated_length": 148.58334045410157,
|
|
"completions/min_length": 134.0,
|
|
"completions/min_terminated_length": 134.0,
|
|
"epoch": 0.9848484848484849,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.25373604893684387,
|
|
"kl": 0.1025156612197558,
|
|
"learning_rate": 1.9377777777777778e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 944522.0,
|
|
"reward": 0.6054783701896668,
|
|
"reward_std": 0.014289665129035711,
|
|
"rewards/reward_function/mean": 0.6054783225059509,
|
|
"rewards/reward_function/std": 0.1043807715177536,
|
|
"step": 65
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 166.8,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 166.8,
|
|
"completions/max_terminated_length": 166.8,
|
|
"completions/mean_length": 148.18333740234374,
|
|
"completions/mean_terminated_length": 148.18333740234374,
|
|
"completions/min_length": 129.8,
|
|
"completions/min_terminated_length": 129.8,
|
|
"epoch": 1.0606060606060606,
|
|
"frac_reward_zero_std": 0.20000000596046447,
|
|
"grad_norm": 0.40739330649375916,
|
|
"kl": 0.13809017241001129,
|
|
"learning_rate": 1.9155555555555558e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 1016593.0,
|
|
"reward": 0.625361704826355,
|
|
"reward_std": 0.020681749982759356,
|
|
"rewards/reward_function/mean": 0.6253616333007812,
|
|
"rewards/reward_function/std": 0.09655277617275715,
|
|
"step": 70
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 161.6,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 161.6,
|
|
"completions/max_terminated_length": 161.6,
|
|
"completions/mean_length": 143.83333740234374,
|
|
"completions/mean_terminated_length": 143.83333740234374,
|
|
"completions/min_length": 128.0,
|
|
"completions/min_terminated_length": 128.0,
|
|
"epoch": 1.1363636363636362,
|
|
"frac_reward_zero_std": 0.33333333730697634,
|
|
"grad_norm": 0.012142821215093136,
|
|
"kl": 0.1849093531568845,
|
|
"learning_rate": 1.8933333333333334e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1087987.0,
|
|
"reward": 0.6634533524513244,
|
|
"reward_std": 0.01583307459950447,
|
|
"rewards/reward_function/mean": 0.6634533286094666,
|
|
"rewards/reward_function/std": 0.12549073845148087,
|
|
"step": 75
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 167.4,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 167.4,
|
|
"completions/max_terminated_length": 167.4,
|
|
"completions/mean_length": 149.76667175292968,
|
|
"completions/mean_terminated_length": 149.76667175292968,
|
|
"completions/min_length": 135.4,
|
|
"completions/min_terminated_length": 135.4,
|
|
"epoch": 1.2121212121212122,
|
|
"frac_reward_zero_std": 0.26666667461395266,
|
|
"grad_norm": 0.2965545952320099,
|
|
"kl": 0.14318528175354003,
|
|
"learning_rate": 1.8711111111111113e-05,
|
|
"loss": 0.0001,
|
|
"num_tokens": 1160193.0,
|
|
"reward": 0.5806800246238708,
|
|
"reward_std": 0.04468099344521761,
|
|
"rewards/reward_function/mean": 0.5806800127029419,
|
|
"rewards/reward_function/std": 0.11235176101326942,
|
|
"step": 80
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 168.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 168.0,
|
|
"completions/max_terminated_length": 168.0,
|
|
"completions/mean_length": 148.08334045410157,
|
|
"completions/mean_terminated_length": 148.08334045410157,
|
|
"completions/min_length": 134.8,
|
|
"completions/min_terminated_length": 134.8,
|
|
"epoch": 1.2878787878787878,
|
|
"frac_reward_zero_std": 0.13333333730697633,
|
|
"grad_norm": 0.3983455300331116,
|
|
"kl": 0.16668486495812734,
|
|
"learning_rate": 1.848888888888889e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1232170.0,
|
|
"reward": 0.5667450308799744,
|
|
"reward_std": 0.02706171413883567,
|
|
"rewards/reward_function/mean": 0.5667450308799744,
|
|
"rewards/reward_function/std": 0.1369288980960846,
|
|
"step": 85
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 159.6,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 159.6,
|
|
"completions/max_terminated_length": 159.6,
|
|
"completions/mean_length": 143.23333740234375,
|
|
"completions/mean_terminated_length": 143.23333740234375,
|
|
"completions/min_length": 127.2,
|
|
"completions/min_terminated_length": 127.2,
|
|
"epoch": 1.3636363636363638,
|
|
"frac_reward_zero_std": 0.20000000596046447,
|
|
"grad_norm": 0.3573758006095886,
|
|
"kl": 0.16684276660283406,
|
|
"learning_rate": 1.826666666666667e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1303880.0,
|
|
"reward": 0.6362017035484314,
|
|
"reward_std": 0.007156953122466803,
|
|
"rewards/reward_function/mean": 0.6362016916275024,
|
|
"rewards/reward_function/std": 0.07174314968287945,
|
|
"step": 90
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 156.2,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 156.2,
|
|
"completions/max_terminated_length": 156.2,
|
|
"completions/mean_length": 142.61667175292968,
|
|
"completions/mean_terminated_length": 142.61667175292968,
|
|
"completions/min_length": 132.4,
|
|
"completions/min_terminated_length": 132.4,
|
|
"epoch": 1.4393939393939394,
|
|
"frac_reward_zero_std": 0.40000001192092893,
|
|
"grad_norm": 0.28054413199424744,
|
|
"kl": 0.16249675651391346,
|
|
"learning_rate": 1.8044444444444445e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1375937.0,
|
|
"reward": 0.5929333567619324,
|
|
"reward_std": 0.010427127918228507,
|
|
"rewards/reward_function/mean": 0.5929333448410035,
|
|
"rewards/reward_function/std": 0.0421123169362545,
|
|
"step": 95
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completion_length": 153.2,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 153.2,
|
|
"completions/max_terminated_length": 153.2,
|
|
"completions/mean_length": 141.6166748046875,
|
|
"completions/mean_terminated_length": 141.6166748046875,
|
|
"completions/min_length": 131.0,
|
|
"completions/min_terminated_length": 131.0,
|
|
"epoch": 1.5151515151515151,
|
|
"frac_reward_zero_std": 0.40000001192092893,
|
|
"grad_norm": 0.35829174518585205,
|
|
"kl": 0.1883176525433858,
|
|
"learning_rate": 1.782222222222222e-05,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1448046.0,
|
|
"reward": 0.5822266817092896,
|
|
"reward_std": 0.0035873036831617355,
|
|
"rewards/reward_function/mean": 0.5822266697883606,
|
|
"rewards/reward_function/std": 0.045498589798808095,
|
|
"step": 100
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 500,
|
|
"num_input_tokens_seen": 1448046,
|
|
"num_train_epochs": 8,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|