{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.40008001600320064, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 937.8125, "completions/mean_terminated_length": 937.8125, "completions/min_length": 628.0, "completions/min_terminated_length": 628.0, "epoch": 0.00020004000800160032, "frac_reward_zero_std": 0.0, "grad_norm": 3.527628499401798, "kl": 0.0019626617431640625, "learning_rate": 0.0, "loss": -0.0356, "num_tokens": 38581.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9395993947982788, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06332202020797816, "rewards/wordcountpos_reward/raw_geo/std": 0.11430166382536866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1377060745318193, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1151.75, "completions/mean_terminated_length": 1151.75, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.00040008001600320064, "frac_reward_zero_std": 0.0, "grad_norm": 3.3088173413126896, "kl": 0.0019207000732421875, "learning_rate": 2e-09, "loss": 0.0185, "num_tokens": 81577.0, "reward": 0.0, "reward_std": 0.945651113986969, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04572642141401159, "rewards/wordcountpos_reward/raw_geo/std": 0.13785616546096172, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1036.1875, "completions/mean_terminated_length": 1036.1875, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.000600120024004801, "frac_reward_zero_std": 0.0, "grad_norm": 3.2279400814834562, "kl": 0.0019130706787109375, "learning_rate": 4e-09, "loss": 0.008, "num_tokens": 124140.0, "reward": 2.9802322387695312e-08, "reward_std": 0.840162992477417, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015641604855250783, "rewards/wordcountpos_reward/raw_geo/std": 0.07693005917161888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1124.7333984375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.0008001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 3.0016251460966408, "kl": 0.0017414093017578125, "learning_rate": 6e-09, "loss": 0.0202, "num_tokens": 160679.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5394235253334045, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03780968722152463, "rewards/wordcountpos_reward/raw_geo/std": 0.06425658756004914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 1109.0, "completions/mean_terminated_length": 1109.0, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.0010002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 1.7236298250304891, "kl": 0.0007333755493164062, "learning_rate": 8e-09, "loss": -0.0022, "num_tokens": 203647.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9491308927536011, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06726112195430183, "rewards/wordcountpos_reward/raw_geo/std": 0.17384160321616854, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 977.0625, "completions/mean_terminated_length": 977.0625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.001200240048009602, "frac_reward_zero_std": 0.0, "grad_norm": 2.7082803572559015, "kl": 0.0012769699096679688, "learning_rate": 1e-08, "loss": 0.0378, "num_tokens": 251608.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8895343542098999, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03377319362428406, "rewards/wordcountpos_reward/raw_geo/std": 0.09072124816242787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1205.4375, "completions/mean_terminated_length": 1107.25, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.0014002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 1.947326967663354, "kl": 0.00078582763671875, "learning_rate": 1.2e-08, "loss": 0.0185, "num_tokens": 304455.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0025546550750732, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3640622136620862, "rewards/wordcountpos_reward/raw_geo/std": 0.21229947965348825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590966, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1096.125, "completions/mean_terminated_length": 1096.125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.0016003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.592799510181172, "kl": 0.002285003662109375, "learning_rate": 1.4e-08, "loss": 0.005, "num_tokens": 345425.0, "reward": 0.0, "reward_std": 0.8836174011230469, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04584762897460425, "rewards/wordcountpos_reward/raw_geo/std": 0.20300709497527086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1271.5, "completions/mean_terminated_length": 1218.769287109375, "completions/min_length": 1101.0, "completions/min_terminated_length": 1101.0, "epoch": 0.0018003600720144029, "frac_reward_zero_std": 0.0, "grad_norm": 2.9518801291571797, "kl": 0.001861572265625, "learning_rate": 1.6e-08, "loss": 0.0282, "num_tokens": 386905.0, "reward": 0.0, "reward_std": 0.8072527647018433, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0038624619297310207, "rewards/wordcountpos_reward/raw_geo/std": 0.0666643954266734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923409, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 978.1875, "completions/mean_terminated_length": 978.1875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.002000400080016003, "frac_reward_zero_std": 0.0, "grad_norm": 2.298998492558027, "kl": 0.0007266998291015625, "learning_rate": 1.8e-08, "loss": -0.0304, "num_tokens": 426236.0, "reward": 7.450580596923828e-09, "reward_std": 1.0550340414047241, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.031131642601040573, "rewards/wordcountpos_reward/raw_geo/std": 0.06226754416007678, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1004.0625, "completions/mean_terminated_length": 1004.0625, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.0022004400880176033, "frac_reward_zero_std": 0.0, "grad_norm": 3.6545045637961344, "kl": 0.002613067626953125, "learning_rate": 2e-08, "loss": -0.0281, "num_tokens": 465741.0, "reward": 2.9802322387695312e-08, "reward_std": 0.602470338344574, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.092484229826523, "rewards/wordcountpos_reward/raw_geo/std": 0.10452240495740213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 919.3125, "completions/mean_terminated_length": 880.6000366210938, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.002400480096019204, "frac_reward_zero_std": 0.0, "grad_norm": 3.649675238428009, "kl": 0.0022182464599609375, "learning_rate": 2.2e-08, "loss": 0.0036, "num_tokens": 504690.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8824746608734131, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1325748640775109, "rewards/wordcountpos_reward/raw_geo/std": 0.08739242698581638, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1051.25, "completions/mean_terminated_length": 1021.3333740234375, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.002600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 3.776655556102875, "kl": 0.002254486083984375, "learning_rate": 2.4e-08, "loss": -0.0502, "num_tokens": 543934.0, "reward": 0.0, "reward_std": 1.0007752180099487, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09221717605913836, "rewards/wordcountpos_reward/raw_geo/std": 0.10661654246927132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 989.1875, "completions/mean_terminated_length": 989.1875, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.0028005601120224045, "frac_reward_zero_std": 0.0, "grad_norm": 3.4045263885075374, "kl": 0.0021114349365234375, "learning_rate": 2.5999999999999998e-08, "loss": -0.0557, "num_tokens": 586153.0, "reward": 0.0, "reward_std": 0.9987242221832275, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07328233140544992, "rewards/wordcountpos_reward/raw_geo/std": 0.07773590256422544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1408308678285174, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1407.875, "completions/mean_terminated_length": 1366.0, "completions/min_length": 1205.0, "completions/min_terminated_length": 1205.0, "epoch": 0.0030006001200240046, "frac_reward_zero_std": 0.0, "grad_norm": 2.6637674347345706, "kl": 0.001651763916015625, "learning_rate": 2.8e-08, "loss": 0.0042, "num_tokens": 637407.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9598821401596069, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18890342937585533, "rewards/wordcountpos_reward/raw_geo/std": 0.08918939774881607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 941.5625, "completions/mean_terminated_length": 941.5625, "completions/min_length": 728.0, "completions/min_terminated_length": 728.0, "epoch": 0.003200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 3.7902949247967315, "kl": 0.00201416015625, "learning_rate": 3e-08, "loss": -0.0211, "num_tokens": 680032.0, "reward": -5.960464477539063e-08, "reward_std": 0.6785845160484314, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18800039214593786, "rewards/wordcountpos_reward/raw_geo/std": 0.24516681115323696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262936, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1181.875, "completions/mean_terminated_length": 1160.666748046875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.0034006801360272052, "frac_reward_zero_std": 0.0, "grad_norm": 3.530423206396441, "kl": 0.0025482177734375, "learning_rate": 3.2e-08, "loss": 0.0551, "num_tokens": 727830.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5536831021308899, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13579335046363653, "rewards/wordcountpos_reward/raw_geo/std": 0.08194666431534924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1241.4285888671875, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.0036007201440288058, "frac_reward_zero_std": 0.0, "grad_norm": 3.1057616181704906, "kl": 0.002300262451171875, "learning_rate": 3.4e-08, "loss": -0.0332, "num_tokens": 763338.0, "reward": 2.9802322387695312e-08, "reward_std": 0.930446982383728, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17500146785117016, "rewards/wordcountpos_reward/raw_geo/std": 0.07142286225477365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1071.3125, "completions/mean_terminated_length": 1071.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.003800760152030406, "frac_reward_zero_std": 0.0, "grad_norm": 3.4713607485961644, "kl": 0.00254058837890625, "learning_rate": 3.6e-08, "loss": 0.0183, "num_tokens": 809799.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9527382254600525, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04392219472145073, "rewards/wordcountpos_reward/raw_geo/std": 0.19282724057465933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1377060745318193, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1179.5, "completions/mean_terminated_length": 1158.1334228515625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.004000800160032006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1363202725296784, "kl": 0.0021038055419921875, "learning_rate": 3.7999999999999996e-08, "loss": 0.0005, "num_tokens": 862751.0, "reward": -1.862645149230957e-08, "reward_std": 1.060667634010315, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2620446166707721, "rewards/wordcountpos_reward/raw_geo/std": 0.21947933776825151, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1016.8125, "completions/mean_terminated_length": 1016.8125, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.004200840168033607, "frac_reward_zero_std": 0.0, "grad_norm": 3.3269915098256053, "kl": 0.002140045166015625, "learning_rate": 4e-08, "loss": -0.0082, "num_tokens": 906964.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8853997588157654, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09927080747029668, "rewards/wordcountpos_reward/raw_geo/std": 0.0805268646754127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1548595540529595, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1074.0, "completions/mean_terminated_length": 1074.0, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.004400880176035207, "frac_reward_zero_std": 0.0, "grad_norm": 3.567377949806492, "kl": 0.002490997314453125, "learning_rate": 4.2e-08, "loss": -0.0244, "num_tokens": 947620.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8644933104515076, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07925202687080059, "rewards/wordcountpos_reward/raw_geo/std": 0.0758299246895885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 987.125, "completions/mean_terminated_length": 987.125, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.004600920184036807, "frac_reward_zero_std": 0.0, "grad_norm": 4.208611808318897, "kl": 0.002841949462890625, "learning_rate": 4.4e-08, "loss": -0.0074, "num_tokens": 999062.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0427882671356201, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03526983305268585, "rewards/wordcountpos_reward/raw_geo/std": 0.04733718241866139, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1274.4375, "completions/mean_terminated_length": 1171.9091796875, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.004800960192038408, "frac_reward_zero_std": 0.0, "grad_norm": 3.065859066921973, "kl": 0.002498626708984375, "learning_rate": 4.5999999999999995e-08, "loss": 0.0059, "num_tokens": 1048981.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6512476205825806, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11186146845153422, "rewards/wordcountpos_reward/raw_geo/std": 0.2219636274296806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.15864005379054394, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1390.0, "completions/mean_terminated_length": 1248.571533203125, "completions/min_length": 1130.0, "completions/min_terminated_length": 1130.0, "epoch": 0.005001000200040008, "frac_reward_zero_std": 0.0, "grad_norm": 2.0696160118781535, "kl": 0.0010509490966796875, "learning_rate": 4.8e-08, "loss": -0.0113, "num_tokens": 1095773.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0030694007873535, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14824852639004496, "rewards/wordcountpos_reward/raw_geo/std": 0.12155263262885863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1109.75, "completions/mean_terminated_length": 1109.75, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.005201040208041608, "frac_reward_zero_std": 0.0, "grad_norm": 3.5808904483899155, "kl": 0.00238800048828125, "learning_rate": 5e-08, "loss": -0.0043, "num_tokens": 1138457.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9966350793838501, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05702305702017543, "rewards/wordcountpos_reward/raw_geo/std": 0.11436940928081135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1195.5625, "completions/mean_terminated_length": 1175.2667236328125, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.0054010802160432084, "frac_reward_zero_std": 0.0, "grad_norm": 3.3502685015860543, "kl": 0.0023136138916015625, "learning_rate": 5.1999999999999996e-08, "loss": 0.0204, "num_tokens": 1179274.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9909127950668335, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07939162887681046, "rewards/wordcountpos_reward/raw_geo/std": 0.10428932455488632, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 927.5625, "completions/mean_terminated_length": 927.5625, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.005601120224044809, "frac_reward_zero_std": 0.0, "grad_norm": 3.497570534065496, "kl": 0.0017910003662109375, "learning_rate": 5.3999999999999994e-08, "loss": -0.0337, "num_tokens": 1228139.0, "reward": 0.0, "reward_std": 0.8223745822906494, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06698038716999538, "rewards/wordcountpos_reward/raw_geo/std": 0.465609639335268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1221.75, "completions/mean_terminated_length": 1221.75, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.0058011602320464095, "frac_reward_zero_std": 0.0, "grad_norm": 3.154653031446095, "kl": 0.00206756591796875, "learning_rate": 5.6e-08, "loss": -0.0105, "num_tokens": 1277127.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9979424476623535, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05153375274395769, "rewards/wordcountpos_reward/raw_geo/std": 0.08762850367344728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1087.6875, "completions/mean_terminated_length": 1087.6875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.006001200240048009, "frac_reward_zero_std": 0.0, "grad_norm": 2.769965263951847, "kl": 0.0016193389892578125, "learning_rate": 5.8e-08, "loss": 0.0384, "num_tokens": 1319818.0, "reward": -5.960464477539063e-08, "reward_std": 0.653556227684021, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09990521663314691, "rewards/wordcountpos_reward/raw_geo/std": 0.1792054790335709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534189, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1236.875, "completions/mean_terminated_length": 1149.166748046875, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.00620124024804961, "frac_reward_zero_std": 0.0, "grad_norm": 2.552651474192458, "kl": 0.0018463134765625, "learning_rate": 6e-08, "loss": -0.0162, "num_tokens": 1371968.0, "reward": 0.0, "reward_std": 0.4588787257671356, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15320880545708118, "rewards/wordcountpos_reward/raw_geo/std": 0.1900779379839505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18130187635645245, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 976.375, "completions/mean_terminated_length": 941.4667358398438, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.00640128025605121, "frac_reward_zero_std": 0.0, "grad_norm": 3.909107751019465, "kl": 0.0029449462890625, "learning_rate": 6.2e-08, "loss": -0.079, "num_tokens": 1416894.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5451881289482117, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16118998121042316, "rewards/wordcountpos_reward/raw_geo/std": 0.18611854456660606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1082.9375, "completions/mean_terminated_length": 1055.1334228515625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.006601320264052811, "frac_reward_zero_std": 0.0, "grad_norm": 2.5692180139121326, "kl": 0.00179290771484375, "learning_rate": 6.4e-08, "loss": -0.1914, "num_tokens": 1468141.0, "reward": -1.4901161193847656e-08, "reward_std": 0.882655143737793, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037042266367277546, "rewards/wordcountpos_reward/raw_geo/std": 0.07329230609937216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15776212754932312, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1064.0, "completions/max_terminated_length": 1064.0, "completions/mean_length": 958.1875, "completions/mean_terminated_length": 958.1875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.0068013602720544105, "frac_reward_zero_std": 0.0, "grad_norm": 3.18763452766036, "kl": 0.002197265625, "learning_rate": 6.6e-08, "loss": -0.0013, "num_tokens": 1519768.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9640591144561768, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2373595299986506, "rewards/wordcountpos_reward/raw_geo/std": 0.2979919818880407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1034.5, "completions/mean_terminated_length": 1034.5, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.007001400280056011, "frac_reward_zero_std": 0.0, "grad_norm": 3.0402526128518494, "kl": 0.0015888214111328125, "learning_rate": 6.8e-08, "loss": -0.0131, "num_tokens": 1557584.0, "reward": 0.0, "reward_std": 0.7507257461547852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1241539385675051, "rewards/wordcountpos_reward/raw_geo/std": 0.16992359920941463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1234.375, "completions/mean_terminated_length": 1196.4285888671875, "completions/min_length": 1033.0, "completions/min_terminated_length": 1033.0, "epoch": 0.0072014402880576115, "frac_reward_zero_std": 0.0, "grad_norm": 2.670408929151118, "kl": 0.0012850761413574219, "learning_rate": 7e-08, "loss": -0.0298, "num_tokens": 1604230.0, "reward": 0.0, "reward_std": 0.7335447669029236, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0038398443027448494, "rewards/wordcountpos_reward/raw_geo/std": 0.133285123615264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 968.9375, "completions/mean_terminated_length": 968.9375, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.007401480296059212, "frac_reward_zero_std": 0.0, "grad_norm": 3.608250084251578, "kl": 0.0019969940185546875, "learning_rate": 7.2e-08, "loss": 0.0085, "num_tokens": 1636989.0, "reward": 0.0, "reward_std": 0.8306714296340942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01242248777773371, "rewards/wordcountpos_reward/raw_geo/std": 0.046526047999266275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1364.75, "completions/mean_terminated_length": 1333.5384521484375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.007601520304060812, "frac_reward_zero_std": 0.0, "grad_norm": 2.678383839115574, "kl": 0.0016307830810546875, "learning_rate": 7.399999999999999e-08, "loss": 0.0346, "num_tokens": 1687353.0, "reward": 0.0, "reward_std": 0.7261468172073364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06788943902991768, "rewards/wordcountpos_reward/raw_geo/std": 0.07790495715118463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.007801560312062412, "frac_reward_zero_std": 0.0, "grad_norm": 3.3785119569476842, "kl": 0.002109527587890625, "learning_rate": 7.599999999999999e-08, "loss": -0.0089, "num_tokens": 1730287.0, "reward": 2.9802322387695312e-08, "reward_std": 0.801064670085907, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08157536204093915, "rewards/wordcountpos_reward/raw_geo/std": 0.2274168733107394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1272.9375, "completions/mean_terminated_length": 1257.800048828125, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.008001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 2.627387832651442, "kl": 0.0014858245849609375, "learning_rate": 7.8e-08, "loss": -0.0217, "num_tokens": 1785742.0, "reward": 1.4901161193847656e-08, "reward_std": 1.061901330947876, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.42870538452307183, "rewards/wordcountpos_reward/raw_geo/std": 0.263985549175234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048495895206211566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1158.75, "completions/mean_terminated_length": 1158.75, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.008201640328065612, "frac_reward_zero_std": 0.0, "grad_norm": 2.522238776465091, "kl": 0.0015172958374023438, "learning_rate": 8e-08, "loss": 0.0333, "num_tokens": 1816978.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6761976480484009, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.005377481638485537, "rewards/wordcountpos_reward/raw_geo/std": 0.0761497347320364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1321.75, "completions/mean_terminated_length": 1240.727294921875, "completions/min_length": 1064.0, "completions/min_terminated_length": 1064.0, "epoch": 0.008401680336067214, "frac_reward_zero_std": 0.0, "grad_norm": 2.7279118100114075, "kl": 0.001800537109375, "learning_rate": 8.2e-08, "loss": 0.0261, "num_tokens": 1852822.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9055695533752441, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04670191211284185, "rewards/wordcountpos_reward/raw_geo/std": 0.03744635498643519, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1237.25, "completions/mean_terminated_length": 1237.25, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.008601720344068814, "frac_reward_zero_std": 0.0, "grad_norm": 2.7283548320320885, "kl": 0.001575469970703125, "learning_rate": 8.4e-08, "loss": -0.0114, "num_tokens": 1906586.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0410590171813965, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06558977500215979, "rewards/wordcountpos_reward/raw_geo/std": 0.10948238148185759, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1128.625, "completions/mean_terminated_length": 1103.86669921875, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.008801760352070413, "frac_reward_zero_std": 0.0, "grad_norm": 2.941551231363272, "kl": 0.0016937255859375, "learning_rate": 8.599999999999999e-08, "loss": -0.016, "num_tokens": 1951372.0, "reward": 7.450580596923828e-09, "reward_std": 1.007286787033081, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.047986016667105835, "rewards/wordcountpos_reward/raw_geo/std": 0.1647848488669715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1229.3125, "completions/mean_terminated_length": 1190.6429443359375, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.009001800360072015, "frac_reward_zero_std": 0.0, "grad_norm": 2.7094244164897625, "kl": 0.0015301704406738281, "learning_rate": 8.8e-08, "loss": 0.0075, "num_tokens": 1997345.0, "reward": 0.0, "reward_std": 0.49579331278800964, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014463292505470429, "rewards/wordcountpos_reward/raw_geo/std": 0.06079814898465263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 969.75, "completions/mean_terminated_length": 969.75, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.009201840368073614, "frac_reward_zero_std": 0.0, "grad_norm": 3.534484823231903, "kl": 0.0018787384033203125, "learning_rate": 9e-08, "loss": 0.0016, "num_tokens": 2043125.0, "reward": 0.0, "reward_std": 1.0315972566604614, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01297143453701503, "rewards/wordcountpos_reward/raw_geo/std": 0.07119654640895512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1019.5625, "completions/mean_terminated_length": 1019.5625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.009401880376075216, "frac_reward_zero_std": 0.0, "grad_norm": 3.463254958479964, "kl": 0.0019664764404296875, "learning_rate": 9.199999999999999e-08, "loss": -0.0589, "num_tokens": 2082446.0, "reward": 0.0, "reward_std": 0.9849706888198853, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0843871901886455, "rewards/wordcountpos_reward/raw_geo/std": 0.09872120752646156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1258.9375, "completions/mean_terminated_length": 1242.86669921875, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.009601920384076815, "frac_reward_zero_std": 0.0, "grad_norm": 2.971385125688432, "kl": 0.0019779205322265625, "learning_rate": 9.4e-08, "loss": -0.0584, "num_tokens": 2132933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6457346081733704, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16243873636014147, "rewards/wordcountpos_reward/raw_geo/std": 0.1255084780802457, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362769, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1047.0, "completions/max_terminated_length": 1047.0, "completions/mean_length": 942.4375, "completions/mean_terminated_length": 942.4375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.009801960392078415, "frac_reward_zero_std": 0.0, "grad_norm": 3.3066241610973615, "kl": 0.001689910888671875, "learning_rate": 9.6e-08, "loss": -0.0143, "num_tokens": 2168836.0, "reward": 0.0, "reward_std": 0.5142208337783813, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20072281556082983, "rewards/wordcountpos_reward/raw_geo/std": 0.16580287533649615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1248.6875, "completions/mean_terminated_length": 1190.6923828125, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.010002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 2.4640311998220077, "kl": 0.0012788772583007812, "learning_rate": 9.8e-08, "loss": 0.0142, "num_tokens": 2217639.0, "reward": 0.0, "reward_std": 0.8277969360351562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16234292152381402, "rewards/wordcountpos_reward/raw_geo/std": 0.16072601120735497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1001.5625, "completions/mean_terminated_length": 1001.5625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.010202040408081616, "frac_reward_zero_std": 0.0, "grad_norm": 2.943966756593823, "kl": 0.001708984375, "learning_rate": 1e-07, "loss": -0.0133, "num_tokens": 2268408.0, "reward": -1.4901161193847656e-08, "reward_std": 0.870779812335968, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13615339609111313, "rewards/wordcountpos_reward/raw_geo/std": 0.1670364547720456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0718795288428261, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1121.1875, "completions/mean_terminated_length": 1121.1875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.010402080416083216, "frac_reward_zero_std": 0.0, "grad_norm": 3.58703209291831, "kl": 0.002361297607421875, "learning_rate": 1.0199999999999999e-07, "loss": -0.0171, "num_tokens": 2311075.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9936850070953369, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04702748511650438, "rewards/wordcountpos_reward/raw_geo/std": 0.07679069169034027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1177.6875, "completions/mean_terminated_length": 1177.6875, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.010602120424084817, "frac_reward_zero_std": 0.0, "grad_norm": 3.451249976717016, "kl": 0.002536773681640625, "learning_rate": 1.0399999999999999e-07, "loss": -0.0061, "num_tokens": 2364150.0, "reward": 0.0, "reward_std": 0.8075898289680481, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2132490017495051, "rewards/wordcountpos_reward/raw_geo/std": 0.14247833058642853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 980.1875, "completions/mean_terminated_length": 980.1875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.010802160432086417, "frac_reward_zero_std": 0.0, "grad_norm": 3.7826596529915872, "kl": 0.002460479736328125, "learning_rate": 1.06e-07, "loss": 0.0153, "num_tokens": 2413217.0, "reward": 5.960464477539063e-08, "reward_std": 0.8012369275093079, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011927149475152986, "rewards/wordcountpos_reward/raw_geo/std": 0.1738858621057102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.625, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 946.8125, "completions/mean_terminated_length": 946.8125, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.011002200440088018, "frac_reward_zero_std": 0.0, "grad_norm": 3.103545716226999, "kl": 0.001865386962890625, "learning_rate": 1.0799999999999999e-07, "loss": -0.0148, "num_tokens": 2453518.0, "reward": 0.0, "reward_std": 0.7228332757949829, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08228915216070509, "rewards/wordcountpos_reward/raw_geo/std": 0.14798677113522873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1060.625, "completions/mean_terminated_length": 1060.625, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.011202240448089618, "frac_reward_zero_std": 0.0, "grad_norm": 2.6791683165663094, "kl": 0.0012788772583007812, "learning_rate": 1.0999999999999999e-07, "loss": 0.002, "num_tokens": 2489912.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0635603666305542, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10841362943864043, "rewards/wordcountpos_reward/raw_geo/std": 0.07803994538825557, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1207.5, "completions/mean_terminated_length": 1165.71435546875, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.011402280456091218, "frac_reward_zero_std": 0.0, "grad_norm": 2.846461532887261, "kl": 0.001705169677734375, "learning_rate": 1.12e-07, "loss": 0.0077, "num_tokens": 2531960.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8560097217559814, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04001479999848212, "rewards/wordcountpos_reward/raw_geo/std": 0.15671627229466206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1130.4375, "completions/mean_terminated_length": 962.45458984375, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.011602320464092819, "frac_reward_zero_std": 0.0, "grad_norm": 3.21901567309924, "kl": 0.0023040771484375, "learning_rate": 1.14e-07, "loss": -0.0568, "num_tokens": 2574079.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0615627765655518, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12349942196245112, "rewards/wordcountpos_reward/raw_geo/std": 0.1955286826790732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1146.0, "completions/max_terminated_length": 1146.0, "completions/mean_length": 1022.0, "completions/mean_terminated_length": 1022.0, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.011802360472094419, "frac_reward_zero_std": 0.0, "grad_norm": 2.8956508751641175, "kl": 0.0021076202392578125, "learning_rate": 1.16e-07, "loss": 0.0151, "num_tokens": 2621215.0, "reward": 0.0, "reward_std": 0.7808208465576172, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03287686977835749, "rewards/wordcountpos_reward/raw_geo/std": 0.06689788514164367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 1053.9375, "completions/mean_terminated_length": 1053.9375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.012002400480096018, "frac_reward_zero_std": 0.0, "grad_norm": 3.2089278709869995, "kl": 0.001739501953125, "learning_rate": 1.1799999999999998e-07, "loss": -0.0092, "num_tokens": 2671142.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9888018369674683, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13551425466957498, "rewards/wordcountpos_reward/raw_geo/std": 0.10539356700345409, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.077817450199525, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1009.125, "completions/mean_terminated_length": 1009.125, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.01220244048809762, "frac_reward_zero_std": 0.0, "grad_norm": 3.400680270630255, "kl": 0.001728057861328125, "learning_rate": 1.2e-07, "loss": -0.0062, "num_tokens": 2713448.0, "reward": -5.960464477539063e-08, "reward_std": 0.443503201007843, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04001027402421124, "rewards/wordcountpos_reward/raw_geo/std": 0.1541240406982376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1117.0, "completions/max_terminated_length": 1117.0, "completions/mean_length": 992.625, "completions/mean_terminated_length": 992.625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.01240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.423086202681301, "kl": 0.0015954971313476562, "learning_rate": 1.2199999999999998e-07, "loss": -0.0236, "num_tokens": 2755442.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9328954219818115, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07611894928599701, "rewards/wordcountpos_reward/raw_geo/std": 0.06957971047329209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.01260252050410082, "frac_reward_zero_std": 0.0, "grad_norm": 3.0369902335352212, "kl": 0.001537322998046875, "learning_rate": 1.24e-07, "loss": 0.005, "num_tokens": 2794957.0, "reward": 0.0, "reward_std": 0.8936575651168823, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.023587060511318154, "rewards/wordcountpos_reward/raw_geo/std": 0.039398007676348705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 951.75, "completions/mean_terminated_length": 951.75, "completions/min_length": 592.0, "completions/min_terminated_length": 592.0, "epoch": 0.01280256051210242, "frac_reward_zero_std": 0.0, "grad_norm": 3.971300381599344, "kl": 0.00274658203125, "learning_rate": 1.26e-07, "loss": 0.0051, "num_tokens": 2835233.0, "reward": 7.450580596923828e-09, "reward_std": 1.0385068655014038, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13802905831220774, "rewards/wordcountpos_reward/raw_geo/std": 0.1624069689135552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1110.1875, "completions/mean_terminated_length": 1110.1875, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.01300260052010402, "frac_reward_zero_std": 0.0, "grad_norm": 3.123944987966859, "kl": 0.0017490386962890625, "learning_rate": 1.28e-07, "loss": 0.0286, "num_tokens": 2875796.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0527963638305664, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023763320717478175, "rewards/wordcountpos_reward/raw_geo/std": 0.10936544193982757, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0843274042711568, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1208.75, "completions/mean_terminated_length": 1208.75, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.013202640528105622, "frac_reward_zero_std": 0.0, "grad_norm": 3.6583881912767886, "kl": 0.00267791748046875, "learning_rate": 1.3e-07, "loss": -0.048, "num_tokens": 2928976.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8107168078422546, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3147331285068067, "rewards/wordcountpos_reward/raw_geo/std": 0.2895872014821923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1605545943838973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1028.375, "completions/mean_terminated_length": 1028.375, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.013402680536107221, "frac_reward_zero_std": 0.0, "grad_norm": 3.433802235170335, "kl": 0.002140045166015625, "learning_rate": 1.32e-07, "loss": -0.0363, "num_tokens": 2962630.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0483653545379639, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.046053671892313776, "rewards/wordcountpos_reward/raw_geo/std": 0.18605159095266544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1174.125, "completions/mean_terminated_length": 1152.4000244140625, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.013602720544108821, "frac_reward_zero_std": 0.0, "grad_norm": 3.027913748087244, "kl": 0.0019359588623046875, "learning_rate": 1.34e-07, "loss": -0.0346, "num_tokens": 3008536.0, "reward": 5.960464477539063e-08, "reward_std": 0.7146797776222229, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06700316140097935, "rewards/wordcountpos_reward/raw_geo/std": 0.08654563346921694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.061913918736689035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1345.6875, "completions/mean_terminated_length": 1310.0770263671875, "completions/min_length": 1175.0, "completions/min_terminated_length": 1175.0, "epoch": 0.013802760552110422, "frac_reward_zero_std": 0.0, "grad_norm": 2.5735353978975524, "kl": 0.0017490386962890625, "learning_rate": 1.36e-07, "loss": -0.03, "num_tokens": 3054915.0, "reward": 0.0, "reward_std": 0.8835979700088501, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03346151161269269, "rewards/wordcountpos_reward/raw_geo/std": 0.11525915276451144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1177.8125, "completions/mean_terminated_length": 1156.3333740234375, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.014002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 1.7794689083003994, "kl": 0.000682830810546875, "learning_rate": 1.3800000000000002e-07, "loss": -0.0068, "num_tokens": 3102320.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8326509594917297, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.268538629523843, "rewards/wordcountpos_reward/raw_geo/std": 0.23431183749792447, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1066.125, "completions/mean_terminated_length": 1066.125, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.014202840568113623, "frac_reward_zero_std": 0.0, "grad_norm": 3.381034537286166, "kl": 0.0021343231201171875, "learning_rate": 1.4e-07, "loss": -0.0082, "num_tokens": 3151290.0, "reward": 0.0, "reward_std": 0.6858251094818115, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05580609313469934, "rewards/wordcountpos_reward/raw_geo/std": 0.10850381029456178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1091.5625, "completions/mean_terminated_length": 1064.3333740234375, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.014402880576115223, "frac_reward_zero_std": 0.0, "grad_norm": 3.384692361783582, "kl": 0.002414703369140625, "learning_rate": 1.4199999999999997e-07, "loss": -0.0146, "num_tokens": 3201067.0, "reward": 0.0, "reward_std": 0.6135813593864441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1346096225740474, "rewards/wordcountpos_reward/raw_geo/std": 0.06833792668093613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1253.0, "completions/mean_terminated_length": 1253.0, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.014602920584116823, "frac_reward_zero_std": 0.0, "grad_norm": 3.219061114628507, "kl": 0.00223541259765625, "learning_rate": 1.44e-07, "loss": -0.0127, "num_tokens": 3248835.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9600502848625183, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14092874197771316, "rewards/wordcountpos_reward/raw_geo/std": 0.13795048980720287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1054.6875, "completions/mean_terminated_length": 1054.6875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.014802960592118424, "frac_reward_zero_std": 0.0, "grad_norm": 2.043617920670456, "kl": 0.0006856918334960938, "learning_rate": 1.4599999999999998e-07, "loss": 0.0021, "num_tokens": 3286950.0, "reward": -3.725290298461914e-09, "reward_std": 1.0264079570770264, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16163362017433497, "rewards/wordcountpos_reward/raw_geo/std": 0.2113384531220164, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1288.0625, "completions/mean_terminated_length": 1273.933349609375, "completions/min_length": 1119.0, "completions/min_terminated_length": 1119.0, "epoch": 0.015003000600120024, "frac_reward_zero_std": 0.0, "grad_norm": 2.463983506646221, "kl": 0.001415252685546875, "learning_rate": 1.4799999999999998e-07, "loss": -0.0151, "num_tokens": 3340703.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0527896881103516, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03372691713834564, "rewards/wordcountpos_reward/raw_geo/std": 0.12906610677594696, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1164.0, "completions/mean_terminated_length": 1164.0, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.015203040608121624, "frac_reward_zero_std": 0.0, "grad_norm": 2.5198558255828356, "kl": 0.0014171600341796875, "learning_rate": 1.5e-07, "loss": 0.0079, "num_tokens": 3378743.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5425284504890442, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10821390293511987, "rewards/wordcountpos_reward/raw_geo/std": 0.11504616127711416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.061913918736689035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1070.9375, "completions/mean_terminated_length": 1070.9375, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.015403080616123225, "frac_reward_zero_std": 0.0, "grad_norm": 2.846890204409714, "kl": 0.0019207000732421875, "learning_rate": 1.5199999999999998e-07, "loss": 0.0047, "num_tokens": 3424662.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7099167108535767, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03401851122698256, "rewards/wordcountpos_reward/raw_geo/std": 0.06980181730189261, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1154.875, "completions/mean_terminated_length": 998.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.015603120624124825, "frac_reward_zero_std": 0.0, "grad_norm": 2.558696511042277, "kl": 0.001270294189453125, "learning_rate": 1.54e-07, "loss": 0.0024, "num_tokens": 3471036.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9273952841758728, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.048057120449891015, "rewards/wordcountpos_reward/raw_geo/std": 0.08042709328156214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1019.875, "completions/mean_terminated_length": 1019.875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.015803160632126424, "frac_reward_zero_std": 0.0, "grad_norm": 2.809161072413743, "kl": 0.0020294189453125, "learning_rate": 1.56e-07, "loss": -0.0282, "num_tokens": 3519450.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0270659923553467, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.24448510858828998, "rewards/wordcountpos_reward/raw_geo/std": 0.19179764342402397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1009.875, "completions/mean_terminated_length": 1009.875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.016003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.742546948619, "kl": 0.00244903564453125, "learning_rate": 1.5799999999999999e-07, "loss": -0.0304, "num_tokens": 3560952.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0620218515396118, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.284031328458443, "rewards/wordcountpos_reward/raw_geo/std": 0.18176904643064618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1104.5, "completions/mean_terminated_length": 1104.5, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.016203240648129627, "frac_reward_zero_std": 0.0, "grad_norm": 3.392097370317782, "kl": 0.00218963623046875, "learning_rate": 1.6e-07, "loss": -0.0438, "num_tokens": 3603272.0, "reward": -2.9802322387695312e-08, "reward_std": 0.550621509552002, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16509166218888163, "rewards/wordcountpos_reward/raw_geo/std": 0.1574318965551874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1004.4375, "completions/mean_terminated_length": 1004.4375, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.016403280656131225, "frac_reward_zero_std": 0.0, "grad_norm": 1.3767283140722126, "kl": 0.00044357776641845703, "learning_rate": 1.62e-07, "loss": -0.0374, "num_tokens": 3642079.0, "reward": -1.4901161193847656e-08, "reward_std": 0.996279239654541, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08207001229598464, "rewards/wordcountpos_reward/raw_geo/std": 0.12002407014395852, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1216.875, "completions/mean_terminated_length": 1151.5384521484375, "completions/min_length": 562.0, "completions/min_terminated_length": 562.0, "epoch": 0.016603320664132826, "frac_reward_zero_std": 0.0, "grad_norm": 3.356900404526726, "kl": 0.002445220947265625, "learning_rate": 1.64e-07, "loss": -0.0508, "num_tokens": 3685389.0, "reward": -3.725290298461914e-09, "reward_std": 1.0197583436965942, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.12103700061719323, "rewards/wordcountpos_reward/raw_geo/std": 0.0625620120191825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1195.625, "completions/mean_terminated_length": 1152.1429443359375, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.016803360672134428, "frac_reward_zero_std": 0.0, "grad_norm": 2.7888688409351525, "kl": 0.0016803741455078125, "learning_rate": 1.66e-07, "loss": 0.0424, "num_tokens": 3735783.0, "reward": 0.0, "reward_std": 0.7441692352294922, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06404987898105448, "rewards/wordcountpos_reward/raw_geo/std": 0.09885025710959537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.017003400680136026, "frac_reward_zero_std": 0.0, "grad_norm": 3.8198736260166233, "kl": 0.002620697021484375, "learning_rate": 1.68e-07, "loss": -0.0309, "num_tokens": 3786546.0, "reward": 0.0, "reward_std": 0.4674009680747986, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2120522081238164, "rewards/wordcountpos_reward/raw_geo/std": 0.2643144000897039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 972.5625, "completions/mean_terminated_length": 972.5625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.017203440688137627, "frac_reward_zero_std": 0.0, "grad_norm": 3.4465610412465018, "kl": 0.002399444580078125, "learning_rate": 1.7000000000000001e-07, "loss": -0.2055, "num_tokens": 3819195.0, "reward": 0.0, "reward_std": 0.45157063007354736, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01212638696753808, "rewards/wordcountpos_reward/raw_geo/std": 0.0989730972160819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.166888740937943, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1032.25, "completions/mean_terminated_length": 1032.25, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.01740348069613923, "frac_reward_zero_std": 0.0, "grad_norm": 3.1029799146423547, "kl": 0.0016727447509765625, "learning_rate": 1.7199999999999998e-07, "loss": -0.0318, "num_tokens": 3859951.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8753600120544434, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14956301911332787, "rewards/wordcountpos_reward/raw_geo/std": 0.3036430780861878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1096.3125, "completions/mean_terminated_length": 1096.3125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.017603520704140826, "frac_reward_zero_std": 0.0, "grad_norm": 2.7406071941723593, "kl": 0.0018787384033203125, "learning_rate": 1.7399999999999997e-07, "loss": 0.01, "num_tokens": 3899604.0, "reward": 0.0, "reward_std": 0.7270439863204956, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3070543166390343, "rewards/wordcountpos_reward/raw_geo/std": 0.2784756707169464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635778, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1312.1875, "completions/mean_terminated_length": 1285.357177734375, "completions/min_length": 1139.0, "completions/min_terminated_length": 1139.0, "epoch": 0.017803560712142428, "frac_reward_zero_std": 0.0, "grad_norm": 2.7153769752823584, "kl": 0.0014514923095703125, "learning_rate": 1.76e-07, "loss": 0.0336, "num_tokens": 3951111.0, "reward": 2.9802322387695312e-08, "reward_std": 0.659126877784729, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05353880335974331, "rewards/wordcountpos_reward/raw_geo/std": 0.3364238241998972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 997.75, "completions/mean_terminated_length": 926.0000610351562, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.01800360072014403, "frac_reward_zero_std": 0.0, "grad_norm": 3.4629171711837587, "kl": 0.0025177001953125, "learning_rate": 1.7799999999999998e-07, "loss": -0.1032, "num_tokens": 3998235.0, "reward": 1.862645149230957e-08, "reward_std": 1.0609157085418701, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04898928131914003, "rewards/wordcountpos_reward/raw_geo/std": 0.0715433718391716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23094010767585033, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1108.6875, "completions/mean_terminated_length": 1108.6875, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.01820364072814563, "frac_reward_zero_std": 0.0, "grad_norm": 2.7100983398781335, "kl": 0.0019359588623046875, "learning_rate": 1.8e-07, "loss": 0.0265, "num_tokens": 4031694.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0323246717453003, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02602604761413077, "rewards/wordcountpos_reward/raw_geo/std": 0.08310000990370793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 995.3125, "completions/mean_terminated_length": 995.3125, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.01840368073614723, "frac_reward_zero_std": 0.0, "grad_norm": 3.9656884576577482, "kl": 0.00266265869140625, "learning_rate": 1.82e-07, "loss": 0.0011, "num_tokens": 4082723.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8962907791137695, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2710285801090401, "rewards/wordcountpos_reward/raw_geo/std": 0.09927527501427448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16815997674172586, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 1032.875, "completions/mean_terminated_length": 1001.7333984375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.01860372074414883, "frac_reward_zero_std": 0.0, "grad_norm": 3.257865049459383, "kl": 0.0021953582763671875, "learning_rate": 1.8399999999999998e-07, "loss": -0.0235, "num_tokens": 4134305.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9222845435142517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02851932756208324, "rewards/wordcountpos_reward/raw_geo/std": 0.0983720202569023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.01880376075215043, "frac_reward_zero_std": 0.0, "grad_norm": 2.591861929202499, "kl": 0.0010938644409179688, "learning_rate": 1.86e-07, "loss": -0.0082, "num_tokens": 4183336.0, "reward": 1.4901161193847656e-08, "reward_std": 0.927165150642395, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0654109965184039, "rewards/wordcountpos_reward/raw_geo/std": 0.03883543969044345, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1205.8125, "completions/mean_terminated_length": 1163.7857666015625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.01900380076015203, "frac_reward_zero_std": 0.0, "grad_norm": 2.843068809361827, "kl": 0.00186920166015625, "learning_rate": 1.88e-07, "loss": 0.0082, "num_tokens": 4223765.0, "reward": 0.0, "reward_std": 0.8880987167358398, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11054466481578845, "rewards/wordcountpos_reward/raw_geo/std": 0.16882626208618992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1079.9375, "completions/mean_terminated_length": 1079.9375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.01920384076815363, "frac_reward_zero_std": 0.0, "grad_norm": 3.5443043426009035, "kl": 0.00235748291015625, "learning_rate": 1.8999999999999998e-07, "loss": -0.027, "num_tokens": 4266828.0, "reward": 0.0, "reward_std": 1.0388721227645874, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12401498536400447, "rewards/wordcountpos_reward/raw_geo/std": 0.059548757863214494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1246.4375, "completions/mean_terminated_length": 1246.4375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.019403880776155232, "frac_reward_zero_std": 0.0, "grad_norm": 2.9745956866672834, "kl": 0.00156402587890625, "learning_rate": 1.92e-07, "loss": -0.0069, "num_tokens": 4302859.0, "reward": -1.862645149230957e-08, "reward_std": 1.0609395503997803, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3570642970214304, "rewards/wordcountpos_reward/raw_geo/std": 0.2923082116181851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1272.1875, "completions/mean_terminated_length": 1135.5, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.01960392078415683, "frac_reward_zero_std": 0.0, "grad_norm": 2.1934913728541017, "kl": 0.001129150390625, "learning_rate": 1.94e-07, "loss": -0.0397, "num_tokens": 4354878.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4104628562927246, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09081082787625226, "rewards/wordcountpos_reward/raw_geo/std": 0.09172777927884528, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512346, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1160.0, "completions/max_terminated_length": 1160.0, "completions/mean_length": 1017.3125, "completions/mean_terminated_length": 1017.3125, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.01980396079215843, "frac_reward_zero_std": 0.0, "grad_norm": 2.855960015372327, "kl": 0.0013818740844726562, "learning_rate": 1.96e-07, "loss": -0.009, "num_tokens": 4396003.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6896897554397583, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04171223591558619, "rewards/wordcountpos_reward/raw_geo/std": 0.04287635771819565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 940.875, "completions/mean_terminated_length": 940.875, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.020004000800160033, "frac_reward_zero_std": 0.0, "grad_norm": 3.4658420723462515, "kl": 0.002307891845703125, "learning_rate": 1.98e-07, "loss": -0.019, "num_tokens": 4432953.0, "reward": 0.0, "reward_std": 1.0164649486541748, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10926581236518279, "rewards/wordcountpos_reward/raw_geo/std": 0.10683221073875608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.06206328908341753, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 885.5, "completions/mean_terminated_length": 844.5333862304688, "completions/min_length": 521.0, "completions/min_terminated_length": 521.0, "epoch": 0.02020404080816163, "frac_reward_zero_std": 0.0, "grad_norm": 3.4682767329592936, "kl": 0.0017375946044921875, "learning_rate": 2e-07, "loss": 0.0544, "num_tokens": 4471337.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8995528221130371, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041884321407224076, "rewards/wordcountpos_reward/raw_geo/std": 0.0520283672232931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1149.4375, "completions/mean_terminated_length": 1149.4375, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.020404080816163232, "frac_reward_zero_std": 0.0, "grad_norm": 2.384941895438001, "kl": 0.0011348724365234375, "learning_rate": 2.02e-07, "loss": -0.0119, "num_tokens": 4511488.0, "reward": 0.0, "reward_std": 0.6648247838020325, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03173980269404662, "rewards/wordcountpos_reward/raw_geo/std": 0.10554141997322652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1060.0, "completions/max_terminated_length": 1060.0, "completions/mean_length": 795.875, "completions/mean_terminated_length": 795.875, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.020604120824164834, "frac_reward_zero_std": 0.0, "grad_norm": 3.459564942395811, "kl": 0.0019626617431640625, "learning_rate": 2.0399999999999997e-07, "loss": -0.0201, "num_tokens": 4548334.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9772127866744995, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06644742460073368, "rewards/wordcountpos_reward/raw_geo/std": 0.043228380952663556, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1074.25, "completions/mean_terminated_length": 1074.25, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.02080416083216643, "frac_reward_zero_std": 0.0, "grad_norm": 3.3682393095675396, "kl": 0.002368927001953125, "learning_rate": 2.06e-07, "loss": 0.0185, "num_tokens": 4585890.0, "reward": 0.0, "reward_std": 0.885869026184082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11426874211834061, "rewards/wordcountpos_reward/raw_geo/std": 0.23478524357029001, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1253.1875, "completions/mean_terminated_length": 1217.9285888671875, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.021004200840168033, "frac_reward_zero_std": 0.0, "grad_norm": 2.7597115654260542, "kl": 0.0016937255859375, "learning_rate": 2.0799999999999998e-07, "loss": 0.0091, "num_tokens": 4640645.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8757137060165405, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18459559015688365, "rewards/wordcountpos_reward/raw_geo/std": 0.1457668092119728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6416666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1179.5, "completions/mean_terminated_length": 1179.5, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.021204240848169634, "frac_reward_zero_std": 0.0, "grad_norm": 2.3564989642109744, "kl": 0.001285552978515625, "learning_rate": 2.0999999999999997e-07, "loss": -0.0079, "num_tokens": 4683549.0, "reward": -1.4901161193847656e-08, "reward_std": 1.021355390548706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1013983261764013, "rewards/wordcountpos_reward/raw_geo/std": 0.06767396516784616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.021404280856171236, "frac_reward_zero_std": 0.0, "grad_norm": 2.499781247330737, "kl": 0.0012226104736328125, "learning_rate": 2.12e-07, "loss": -0.0114, "num_tokens": 4725681.0, "reward": -3.725290298461914e-09, "reward_std": 0.9972322583198547, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1419042801655097, "rewards/wordcountpos_reward/raw_geo/std": 0.09633503898715794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1004.1875, "completions/mean_terminated_length": 1004.1875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.021604320864172834, "frac_reward_zero_std": 0.0, "grad_norm": 2.807298263356802, "kl": 0.001522064208984375, "learning_rate": 2.1399999999999998e-07, "loss": -0.0023, "num_tokens": 4762684.0, "reward": 0.0, "reward_std": 0.7315050959587097, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019604161675491012, "rewards/wordcountpos_reward/raw_geo/std": 0.06171842709267681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1154.0, "completions/max_terminated_length": 1154.0, "completions/mean_length": 956.875, "completions/mean_terminated_length": 956.875, "completions/min_length": 577.0, "completions/min_terminated_length": 577.0, "epoch": 0.021804360872174435, "frac_reward_zero_std": 0.0, "grad_norm": 2.6275841668135316, "kl": 0.0013952255249023438, "learning_rate": 2.1599999999999998e-07, "loss": -0.054, "num_tokens": 4792770.0, "reward": 0.0, "reward_std": 0.9902944564819336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1414058756587871, "rewards/wordcountpos_reward/raw_geo/std": 0.10904737544819008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1121.5625, "completions/mean_terminated_length": 1121.5625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.022004400880176037, "frac_reward_zero_std": 0.0, "grad_norm": 3.6076313920224785, "kl": 0.00240325927734375, "learning_rate": 2.18e-07, "loss": -0.0511, "num_tokens": 4833083.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0530446767807007, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03558353354252604, "rewards/wordcountpos_reward/raw_geo/std": 0.056397172435382066, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 976.625, "completions/mean_terminated_length": 976.625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.022204440888177635, "frac_reward_zero_std": 0.0, "grad_norm": 3.3235755539185035, "kl": 0.0019683837890625, "learning_rate": 2.1999999999999998e-07, "loss": 0.046, "num_tokens": 4864725.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8679072856903076, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12339057280874857, "rewards/wordcountpos_reward/raw_geo/std": 0.08453155713155466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1270.0, "completions/mean_terminated_length": 1132.0, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.022404480896179236, "frac_reward_zero_std": 0.0, "grad_norm": 2.1322034674072152, "kl": 0.0014905929565429688, "learning_rate": 2.22e-07, "loss": -0.1409, "num_tokens": 4917213.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8368362784385681, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1491872710799866, "rewards/wordcountpos_reward/raw_geo/std": 0.20379599415714353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1704025734460517, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1200.125, "completions/mean_terminated_length": 1130.923095703125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.022604520904180837, "frac_reward_zero_std": 0.0, "grad_norm": 3.5549011210358445, "kl": 0.0025691986083984375, "learning_rate": 2.24e-07, "loss": -0.0612, "num_tokens": 4967023.0, "reward": -1.4901161193847656e-08, "reward_std": 1.015423059463501, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11918412858870782, "rewards/wordcountpos_reward/raw_geo/std": 0.13533358411349417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 936.375, "completions/mean_terminated_length": 936.375, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.022804560912182435, "frac_reward_zero_std": 0.0, "grad_norm": 3.3389039395237767, "kl": 0.0020427703857421875, "learning_rate": 2.2599999999999999e-07, "loss": 0.0183, "num_tokens": 5015005.0, "reward": 0.0, "reward_std": 0.8978585600852966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009347811527754012, "rewards/wordcountpos_reward/raw_geo/std": 0.1411939361251286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1113.625, "completions/mean_terminated_length": 1113.625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.023004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 3.2878493435850324, "kl": 0.002399444580078125, "learning_rate": 2.28e-07, "loss": 0.0356, "num_tokens": 5066695.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8935801982879639, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11597057825403508, "rewards/wordcountpos_reward/raw_geo/std": 0.11845162069206665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 970.125, "completions/mean_terminated_length": 970.125, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.023204640928185638, "frac_reward_zero_std": 0.0, "grad_norm": 3.5267684813119535, "kl": 0.0021381378173828125, "learning_rate": 2.3e-07, "loss": 0.01, "num_tokens": 5107385.0, "reward": 0.0, "reward_std": 0.8746201992034912, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015695084144887007, "rewards/wordcountpos_reward/raw_geo/std": 0.2061006028584386, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1109.3125, "completions/mean_terminated_length": 1109.3125, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.023404680936187236, "frac_reward_zero_std": 0.0, "grad_norm": 3.0749471193826405, "kl": 0.002002716064453125, "learning_rate": 2.32e-07, "loss": -0.0146, "num_tokens": 5149702.0, "reward": 0.0, "reward_std": 0.6566903591156006, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011287498376811759, "rewards/wordcountpos_reward/raw_geo/std": 0.08044698658902417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 951.3125, "completions/mean_terminated_length": 951.3125, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.023604720944188837, "frac_reward_zero_std": 0.0, "grad_norm": 3.1064462779133564, "kl": 0.0017032623291015625, "learning_rate": 2.34e-07, "loss": 0.0172, "num_tokens": 5185859.0, "reward": 0.0, "reward_std": 0.8671146631240845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04180583506795414, "rewards/wordcountpos_reward/raw_geo/std": 0.062227449788775945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1177.5, "completions/mean_terminated_length": 1177.5, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.02380476095219044, "frac_reward_zero_std": 0.0, "grad_norm": 2.3813904851095384, "kl": 0.0012912750244140625, "learning_rate": 2.3599999999999997e-07, "loss": -0.014, "num_tokens": 5227563.0, "reward": 0.0, "reward_std": 0.732681393623352, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16515597229797943, "rewards/wordcountpos_reward/raw_geo/std": 0.16025598405421565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1140.4375, "completions/mean_terminated_length": 1140.4375, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.024004800960192037, "frac_reward_zero_std": 0.0, "grad_norm": 3.3862005299374833, "kl": 0.002170562744140625, "learning_rate": 2.38e-07, "loss": -0.0357, "num_tokens": 5273410.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0341181755065918, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1599049300454976, "rewards/wordcountpos_reward/raw_geo/std": 0.1869016424688019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 947.5625, "completions/mean_terminated_length": 947.5625, "completions/min_length": 557.0, "completions/min_terminated_length": 557.0, "epoch": 0.024204840968193638, "frac_reward_zero_std": 0.0, "grad_norm": 1.6435117375732204, "kl": 0.0004982054233551025, "learning_rate": 2.4e-07, "loss": -0.0361, "num_tokens": 5309651.0, "reward": 7.450580596923828e-09, "reward_std": 1.0177175998687744, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03212280122158982, "rewards/wordcountpos_reward/raw_geo/std": 0.03682415859142722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1053.375, "completions/mean_terminated_length": 1053.375, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.02440488097619524, "frac_reward_zero_std": 0.0, "grad_norm": 3.55504416660261, "kl": 0.002532958984375, "learning_rate": 2.4199999999999997e-07, "loss": -0.0464, "num_tokens": 5342577.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8219430446624756, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03377315111105618, "rewards/wordcountpos_reward/raw_geo/std": 0.0751326064845707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1374.4375, "completions/mean_terminated_length": 1276.77783203125, "completions/min_length": 1180.0, "completions/min_terminated_length": 1180.0, "epoch": 0.02460492098419684, "frac_reward_zero_std": 0.0, "grad_norm": 2.4842196179738147, "kl": 0.0013828277587890625, "learning_rate": 2.4399999999999996e-07, "loss": -0.0344, "num_tokens": 5395160.0, "reward": 0.0, "reward_std": 1.0364223718643188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008041307139657523, "rewards/wordcountpos_reward/raw_geo/std": 0.12155709441453373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970786, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1115.5625, "completions/mean_terminated_length": 987.4166870117188, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.02480496099219844, "frac_reward_zero_std": 0.0, "grad_norm": 3.663760255484724, "kl": 0.002788543701171875, "learning_rate": 2.46e-07, "loss": -0.0061, "num_tokens": 5433233.0, "reward": 0.0, "reward_std": 0.9057458639144897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.35074900357142696, "rewards/wordcountpos_reward/raw_geo/std": 0.27420162699006634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 1055.3125, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.02500500100020004, "frac_reward_zero_std": 0.0, "grad_norm": 2.110325750198092, "kl": 0.0006413459777832031, "learning_rate": 2.48e-07, "loss": -0.0124, "num_tokens": 5476478.0, "reward": -7.450580596923828e-09, "reward_std": 0.9084052443504333, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.32454356594644873, "rewards/wordcountpos_reward/raw_geo/std": 0.12864399465208828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1228.5625, "completions/mean_terminated_length": 1138.0833740234375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.02520504100820164, "frac_reward_zero_std": 0.0, "grad_norm": 2.725180393157286, "kl": 0.0017547607421875, "learning_rate": 2.5e-07, "loss": 0.0204, "num_tokens": 5521255.0, "reward": 0.0, "reward_std": 0.9177630543708801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09296925946984191, "rewards/wordcountpos_reward/raw_geo/std": 0.09994415368604596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1028.875, "completions/mean_terminated_length": 997.4667358398438, "completions/min_length": 626.0, "completions/min_terminated_length": 626.0, "epoch": 0.02540508101620324, "frac_reward_zero_std": 0.0, "grad_norm": 3.533129468799629, "kl": 0.002468109130859375, "learning_rate": 2.52e-07, "loss": 0.0167, "num_tokens": 5565925.0, "reward": 7.450580596923828e-09, "reward_std": 1.0681498050689697, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.005731092753961403, "rewards/wordcountpos_reward/raw_geo/std": 0.04030005129155748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1089.9375, "completions/mean_terminated_length": 1089.9375, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.02560512102420484, "frac_reward_zero_std": 0.0, "grad_norm": 2.614935433969565, "kl": 0.00139617919921875, "learning_rate": 2.5399999999999997e-07, "loss": -0.0108, "num_tokens": 5606364.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9422404170036316, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003325936766975837, "rewards/wordcountpos_reward/raw_geo/std": 0.0676704561930054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16278820596099708, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1149.625, "completions/mean_terminated_length": 1126.2667236328125, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.025805161032206442, "frac_reward_zero_std": 0.0, "grad_norm": 3.4055742079877773, "kl": 0.00235748291015625, "learning_rate": 2.56e-07, "loss": -0.0034, "num_tokens": 5650382.0, "reward": 4.470348358154297e-08, "reward_std": 0.9219162464141846, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06994234102620896, "rewards/wordcountpos_reward/raw_geo/std": 0.10535251869751974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455328, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1123.125, "completions/mean_terminated_length": 1098.0, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.02600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 3.7806052064194846, "kl": 0.002964019775390625, "learning_rate": 2.58e-07, "loss": -0.0223, "num_tokens": 5698360.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9927940368652344, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03625284063439026, "rewards/wordcountpos_reward/raw_geo/std": 0.07598196410270074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1165.5, "completions/mean_terminated_length": 1143.2000732421875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.026205241048209642, "frac_reward_zero_std": 0.0, "grad_norm": 3.3544193262357798, "kl": 0.002727508544921875, "learning_rate": 2.6e-07, "loss": 0.0272, "num_tokens": 5743120.0, "reward": 3.725290298461914e-09, "reward_std": 0.9749528169631958, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10519852249015271, "rewards/wordcountpos_reward/raw_geo/std": 0.07211044110903524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1248.0, "completions/mean_terminated_length": 1231.2000732421875, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.026405281056211243, "frac_reward_zero_std": 0.0, "grad_norm": 2.5524734540426284, "kl": 0.0016803741455078125, "learning_rate": 2.62e-07, "loss": -0.0441, "num_tokens": 5787936.0, "reward": -3.725290298461914e-08, "reward_std": 1.047700047492981, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017072103381075506, "rewards/wordcountpos_reward/raw_geo/std": 0.09429196039589322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 916.875, "completions/mean_terminated_length": 916.875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.02660532106421284, "frac_reward_zero_std": 0.0, "grad_norm": 3.984809904853974, "kl": 0.002796173095703125, "learning_rate": 2.64e-07, "loss": -0.0386, "num_tokens": 5827406.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8076682686805725, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22494880857242078, "rewards/wordcountpos_reward/raw_geo/std": 0.06743884582965824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 954.0, "completions/mean_terminated_length": 954.0, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.026805361072214443, "frac_reward_zero_std": 0.0, "grad_norm": 3.499095946392049, "kl": 0.002285003662109375, "learning_rate": 2.66e-07, "loss": 0.0048, "num_tokens": 5857294.0, "reward": 7.450580596923828e-09, "reward_std": 0.9633626937866211, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010387670910388783, "rewards/wordcountpos_reward/raw_geo/std": 0.0709327108022106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382576, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 940.9375, "completions/mean_terminated_length": 940.9375, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.027005401080216044, "frac_reward_zero_std": 0.0, "grad_norm": 3.1062492040873897, "kl": 0.0018215179443359375, "learning_rate": 2.68e-07, "loss": -0.0052, "num_tokens": 5890693.0, "reward": 0.0, "reward_std": 0.7280793190002441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019208240754161037, "rewards/wordcountpos_reward/raw_geo/std": 0.06828392423240585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 1022.8125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.027205441088217642, "frac_reward_zero_std": 0.0, "grad_norm": 3.2603917076292315, "kl": 0.0019207000732421875, "learning_rate": 2.7e-07, "loss": -0.0168, "num_tokens": 5921066.0, "reward": 0.0, "reward_std": 0.8826114535331726, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02657969863034007, "rewards/wordcountpos_reward/raw_geo/std": 0.07274257114292472, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1082.1875, "completions/mean_terminated_length": 1082.1875, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.027405481096219243, "frac_reward_zero_std": 0.0, "grad_norm": 2.7859647768529237, "kl": 0.0013942718505859375, "learning_rate": 2.72e-07, "loss": 0.0081, "num_tokens": 5961941.0, "reward": 0.0, "reward_std": 0.9826708436012268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07373379288402647, "rewards/wordcountpos_reward/raw_geo/std": 0.15970077738730076, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1249.9375, "completions/mean_terminated_length": 1166.5833740234375, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.027605521104220845, "frac_reward_zero_std": 0.0, "grad_norm": 2.936763347656408, "kl": 0.0021915435791015625, "learning_rate": 2.74e-07, "loss": 0.0078, "num_tokens": 6007172.0, "reward": 0.0, "reward_std": 1.0613622665405273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007903899007037299, "rewards/wordcountpos_reward/raw_geo/std": 0.08952140105427299, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0910840068085298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1131.5, "completions/mean_terminated_length": 1078.857177734375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.027805561112222446, "frac_reward_zero_std": 0.0, "grad_norm": 2.7921426756357075, "kl": 0.0017223358154296875, "learning_rate": 2.7600000000000004e-07, "loss": 0.007, "num_tokens": 6053004.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8758231997489929, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0253874822157782, "rewards/wordcountpos_reward/raw_geo/std": 0.2014196689822482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1023.8125, "completions/mean_terminated_length": 1023.8125, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.028005601120224044, "frac_reward_zero_std": 0.0, "grad_norm": 3.4174609368665783, "kl": 0.001895904541015625, "learning_rate": 2.7800000000000003e-07, "loss": 0.0346, "num_tokens": 6085441.0, "reward": 0.0, "reward_std": 0.6017365455627441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0644692289877486, "rewards/wordcountpos_reward/raw_geo/std": 0.11806502235160024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1024.5, "completions/mean_terminated_length": 992.800048828125, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.028205641128225645, "frac_reward_zero_std": 0.0, "grad_norm": 3.104866529478396, "kl": 0.002140045166015625, "learning_rate": 2.8e-07, "loss": 0.0069, "num_tokens": 6126377.0, "reward": 5.960464477539063e-08, "reward_std": 0.6350376009941101, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09587675568359075, "rewards/wordcountpos_reward/raw_geo/std": 0.09502655784770331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1040.9375, "completions/mean_terminated_length": 1040.9375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.028405681136227247, "frac_reward_zero_std": 0.0, "grad_norm": 2.1131392202631027, "kl": 0.001201629638671875, "learning_rate": 2.8199999999999996e-07, "loss": -0.0125, "num_tokens": 6172440.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0225037336349487, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417944072302122, "rewards/wordcountpos_reward/raw_geo/std": 0.09469170277743412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503964, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1030.4444580078125, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.028605721144228845, "frac_reward_zero_std": 0.0, "grad_norm": 2.6560744420850417, "kl": 0.0016994476318359375, "learning_rate": 2.8399999999999995e-07, "loss": -0.0364, "num_tokens": 6230382.0, "reward": 0.0, "reward_std": 0.555607795715332, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07065391504281567, "rewards/wordcountpos_reward/raw_geo/std": 0.2377454948908988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1203.1875, "completions/mean_terminated_length": 1160.7857666015625, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.028805761152230446, "frac_reward_zero_std": 0.0, "grad_norm": 2.7001440810266546, "kl": 0.0014753341674804688, "learning_rate": 2.8599999999999994e-07, "loss": 0.0262, "num_tokens": 6277681.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9347323179244995, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0726886643579835, "rewards/wordcountpos_reward/raw_geo/std": 0.1141527658611117, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1142.1875, "completions/mean_terminated_length": 1118.3333740234375, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.029005801160232048, "frac_reward_zero_std": 0.0, "grad_norm": 3.3034450193456477, "kl": 0.0023651123046875, "learning_rate": 2.88e-07, "loss": -0.0314, "num_tokens": 6322252.0, "reward": 0.0, "reward_std": 0.7847909927368164, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05470724248289191, "rewards/wordcountpos_reward/raw_geo/std": 0.0917857853186929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1090.0, "completions/mean_terminated_length": 1090.0, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.029205841168233646, "frac_reward_zero_std": 0.0, "grad_norm": 3.565351156924563, "kl": 0.002532958984375, "learning_rate": 2.9e-07, "loss": -0.024, "num_tokens": 6366404.0, "reward": -7.450580596923828e-09, "reward_std": 1.0665879249572754, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1619085009815739, "rewards/wordcountpos_reward/raw_geo/std": 0.11228047692914898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15341785110291775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 891.4375, "completions/mean_terminated_length": 891.4375, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.029405881176235247, "frac_reward_zero_std": 0.0, "grad_norm": 3.044216507303204, "kl": 0.0013246536254882812, "learning_rate": 2.9199999999999997e-07, "loss": -0.0204, "num_tokens": 6393883.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9801445007324219, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09128864151802746, "rewards/wordcountpos_reward/raw_geo/std": 0.09928565252239704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0859586463881842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1156.875, "completions/mean_terminated_length": 1156.875, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.02960592118423685, "frac_reward_zero_std": 0.0, "grad_norm": 2.667885227131188, "kl": 0.001422882080078125, "learning_rate": 2.9399999999999996e-07, "loss": -0.0217, "num_tokens": 6428065.0, "reward": 0.0, "reward_std": 0.6467468738555908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21520958934934514, "rewards/wordcountpos_reward/raw_geo/std": 0.2574197964156713, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1160.4375, "completions/mean_terminated_length": 1160.4375, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.029805961192238446, "frac_reward_zero_std": 0.0, "grad_norm": 3.368116829078309, "kl": 0.002346038818359375, "learning_rate": 2.9599999999999995e-07, "loss": -0.0449, "num_tokens": 6477560.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9497330188751221, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05884865624844422, "rewards/wordcountpos_reward/raw_geo/std": 0.2553102118738447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1148.375, "completions/mean_terminated_length": 1148.375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.030006001200240048, "frac_reward_zero_std": 0.0, "grad_norm": 2.840269778480782, "kl": 0.0018157958984375, "learning_rate": 2.98e-07, "loss": -0.1826, "num_tokens": 6517790.0, "reward": 3.725290298461914e-08, "reward_std": 0.9691453576087952, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07166437652560242, "rewards/wordcountpos_reward/raw_geo/std": 0.05517467197768663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.19163043135739746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1157.125, "completions/mean_terminated_length": 1157.125, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.03020604120824165, "frac_reward_zero_std": 0.0, "grad_norm": 3.241205600832832, "kl": 0.002277374267578125, "learning_rate": 3e-07, "loss": -0.0051, "num_tokens": 6566200.0, "reward": 0.0, "reward_std": 0.8757079839706421, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1252726546747316, "rewards/wordcountpos_reward/raw_geo/std": 0.14860622568769824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1028.125, "completions/mean_terminated_length": 1028.125, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.030406081216243247, "frac_reward_zero_std": 0.0, "grad_norm": 3.1304438832959685, "kl": 0.0014595985412597656, "learning_rate": 3.02e-07, "loss": -0.0447, "num_tokens": 6609938.0, "reward": 0.0, "reward_std": 0.8762164115905762, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00892900632361902, "rewards/wordcountpos_reward/raw_geo/std": 0.14022686054727213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1191.5, "completions/mean_terminated_length": 1170.933349609375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.03060612122424485, "frac_reward_zero_std": 0.0, "grad_norm": 3.4387916614209164, "kl": 0.002582550048828125, "learning_rate": 3.0399999999999997e-07, "loss": -0.0471, "num_tokens": 6654362.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8346088528633118, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08430786717237182, "rewards/wordcountpos_reward/raw_geo/std": 0.06411529205876527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 942.1875, "completions/mean_terminated_length": 905.0000610351562, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.03080616123224645, "frac_reward_zero_std": 0.0, "grad_norm": 3.5831376857759043, "kl": 0.00226593017578125, "learning_rate": 3.0599999999999996e-07, "loss": -0.0395, "num_tokens": 6689349.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0417293310165405, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011742284117961408, "rewards/wordcountpos_reward/raw_geo/std": 0.08746125385088048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1090.8125, "completions/mean_terminated_length": 1032.357177734375, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.03100620124024805, "frac_reward_zero_std": 0.0, "grad_norm": 3.386953741973585, "kl": 0.002246856689453125, "learning_rate": 3.08e-07, "loss": 0.0181, "num_tokens": 6730746.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0403060913085938, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005257365669998601, "rewards/wordcountpos_reward/raw_geo/std": 0.02506443591115819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.0739118594202782, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1019.4375, "completions/mean_terminated_length": 1019.4375, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.03120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 2.8624964229202554, "kl": 0.001758575439453125, "learning_rate": 3.1e-07, "loss": 0.0073, "num_tokens": 6780465.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7841784954071045, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12933117903437513, "rewards/wordcountpos_reward/raw_geo/std": 0.19474236112635995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1146.3125, "completions/mean_terminated_length": 1146.3125, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.03140628125625125, "frac_reward_zero_std": 0.0, "grad_norm": 2.93832291066197, "kl": 0.0017795562744140625, "learning_rate": 3.12e-07, "loss": 0.0335, "num_tokens": 6822374.0, "reward": 0.0, "reward_std": 0.8305565118789673, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12869431953514113, "rewards/wordcountpos_reward/raw_geo/std": 0.11311646795891614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1114.3125, "completions/mean_terminated_length": 1114.3125, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.03160632126425285, "frac_reward_zero_std": 0.0, "grad_norm": 3.566016521175704, "kl": 0.00229644775390625, "learning_rate": 3.14e-07, "loss": -0.0388, "num_tokens": 6873835.0, "reward": 0.0, "reward_std": 0.6947319507598877, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22582155981137864, "rewards/wordcountpos_reward/raw_geo/std": 0.17251187513536334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189793, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1247.125, "completions/mean_terminated_length": 1230.2667236328125, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.03180636127225445, "frac_reward_zero_std": 0.0, "grad_norm": 3.2330647170447384, "kl": 0.002361297607421875, "learning_rate": 3.1599999999999997e-07, "loss": -0.0321, "num_tokens": 6915309.0, "reward": 0.0, "reward_std": 0.6039950847625732, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09379096119745979, "rewards/wordcountpos_reward/raw_geo/std": 0.1418673062147658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1196.1875, "completions/mean_terminated_length": 1126.076904296875, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.03200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 2.7486087367504455, "kl": 0.0015363693237304688, "learning_rate": 3.18e-07, "loss": -0.015, "num_tokens": 6972576.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9035705327987671, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011589700336663985, "rewards/wordcountpos_reward/raw_geo/std": 0.03088870013934026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18519259244445038, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1106.75, "completions/mean_terminated_length": 1050.571533203125, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.03220644128825765, "frac_reward_zero_std": 0.0, "grad_norm": 2.575819086441216, "kl": 0.0009403228759765625, "learning_rate": 3.2e-07, "loss": 0.007, "num_tokens": 7025012.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0658233165740967, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10180951553021601, "rewards/wordcountpos_reward/raw_geo/std": 0.08388929266352826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.032406481296259254, "frac_reward_zero_std": 0.0, "grad_norm": 2.162613193517412, "kl": 0.00119781494140625, "learning_rate": 3.22e-07, "loss": -0.0038, "num_tokens": 7059800.0, "reward": 0.0, "reward_std": 0.9184061884880066, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.035449787671120364, "rewards/wordcountpos_reward/raw_geo/std": 0.1655324974661436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1038.25, "completions/mean_terminated_length": 1038.25, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.03260652130426085, "frac_reward_zero_std": 0.0, "grad_norm": 1.726300481116887, "kl": 0.0005960464477539062, "learning_rate": 3.24e-07, "loss": -0.0106, "num_tokens": 7102684.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5779698491096497, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07824030467868008, "rewards/wordcountpos_reward/raw_geo/std": 0.1217785746553571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/max_terminated_length": 1051.0, "completions/mean_length": 914.1875, "completions/mean_terminated_length": 914.1875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.03280656131226245, "frac_reward_zero_std": 0.0, "grad_norm": 1.8882417408775412, "kl": 0.00045561790466308594, "learning_rate": 3.26e-07, "loss": -0.001, "num_tokens": 7129199.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6361270546913147, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019847480950444092, "rewards/wordcountpos_reward/raw_geo/std": 0.05674052777081613, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1230.8125, "completions/mean_terminated_length": 1108.45458984375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.033006601320264055, "frac_reward_zero_std": 0.0, "grad_norm": 3.2836735737424263, "kl": 0.00185394287109375, "learning_rate": 3.28e-07, "loss": 0.002, "num_tokens": 7183932.0, "reward": 0.0, "reward_std": 0.4567263424396515, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0749356086203115, "rewards/wordcountpos_reward/raw_geo/std": 0.15793615083402637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823633, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1028.0625, "completions/mean_terminated_length": 960.6428833007812, "completions/min_length": 584.0, "completions/min_terminated_length": 584.0, "epoch": 0.03320664132826565, "frac_reward_zero_std": 0.0, "grad_norm": 3.414077559439006, "kl": 0.002269744873046875, "learning_rate": 3.3e-07, "loss": -0.0331, "num_tokens": 7226757.0, "reward": 0.0, "reward_std": 0.8215432167053223, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15710710270210468, "rewards/wordcountpos_reward/raw_geo/std": 0.15804379716155278, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1016.0, "completions/max_terminated_length": 1016.0, "completions/mean_length": 804.0625, "completions/mean_terminated_length": 804.0625, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.03340668133626725, "frac_reward_zero_std": 0.0, "grad_norm": 3.841176918343631, "kl": 0.00176239013671875, "learning_rate": 3.32e-07, "loss": -0.0192, "num_tokens": 7253814.0, "reward": 0.0, "reward_std": 0.9507547616958618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015403673599066161, "rewards/wordcountpos_reward/raw_geo/std": 0.07322449785572387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 931.5625, "completions/mean_terminated_length": 931.5625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.033606721344268856, "frac_reward_zero_std": 0.0, "grad_norm": 4.114088148721571, "kl": 0.003040313720703125, "learning_rate": 3.34e-07, "loss": -0.0069, "num_tokens": 7284879.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9934753179550171, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023421842390939075, "rewards/wordcountpos_reward/raw_geo/std": 0.031201541495178103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1175.1875, "completions/mean_terminated_length": 1175.1875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.033806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 2.692831376482583, "kl": 0.0016002655029296875, "learning_rate": 3.36e-07, "loss": -0.0321, "num_tokens": 7325962.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8348179459571838, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0040702192219965, "rewards/wordcountpos_reward/raw_geo/std": 0.07941516655990437, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1035.125, "completions/mean_terminated_length": 1004.1333618164062, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.03400680136027205, "frac_reward_zero_std": 0.0, "grad_norm": 3.6407281749099107, "kl": 0.00213623046875, "learning_rate": 3.38e-07, "loss": -0.0169, "num_tokens": 7376980.0, "reward": 0.0, "reward_std": 0.3154091238975525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0761640152028266, "rewards/wordcountpos_reward/raw_geo/std": 0.3022664167575318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1150.0, "completions/mean_terminated_length": 1126.666748046875, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.034206841368273656, "frac_reward_zero_std": 0.0, "grad_norm": 3.458684178068551, "kl": 0.002552032470703125, "learning_rate": 3.4000000000000003e-07, "loss": 0.0312, "num_tokens": 7422108.0, "reward": -7.450580596923828e-09, "reward_std": 1.017713189125061, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.003107375680797565, "rewards/wordcountpos_reward/raw_geo/std": 0.13157277958765579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.141878925953186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 993.875, "completions/mean_terminated_length": 993.875, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.034406881376275254, "frac_reward_zero_std": 0.0, "grad_norm": 2.878871516473372, "kl": 0.0018367767333984375, "learning_rate": 3.42e-07, "loss": -0.0647, "num_tokens": 7468522.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0539871454238892, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10589587437566766, "rewards/wordcountpos_reward/raw_geo/std": 0.10047178629357259, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1118.5625, "completions/mean_terminated_length": 1093.1334228515625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.03460692138427685, "frac_reward_zero_std": 0.0, "grad_norm": 3.1081152431224957, "kl": 0.0016803741455078125, "learning_rate": 3.4399999999999996e-07, "loss": -0.0216, "num_tokens": 7508555.0, "reward": -5.960464477539063e-08, "reward_std": 0.7262141108512878, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015564385874576205, "rewards/wordcountpos_reward/raw_geo/std": 0.15203653049736218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1229.4375, "completions/mean_terminated_length": 1229.4375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.03480696139227846, "frac_reward_zero_std": 0.0, "grad_norm": 3.0440136356529304, "kl": 0.0018520355224609375, "learning_rate": 3.4599999999999995e-07, "loss": -0.005, "num_tokens": 7555594.0, "reward": 0.0, "reward_std": 0.7898181676864624, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.017639132492135577, "rewards/wordcountpos_reward/raw_geo/std": 0.23086481665634997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1298.6875, "completions/mean_terminated_length": 1097.375, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.035007001400280055, "frac_reward_zero_std": 0.0, "grad_norm": 2.113065970436417, "kl": 0.0011692047119140625, "learning_rate": 3.4799999999999994e-07, "loss": -0.04, "num_tokens": 7613973.0, "reward": 0.0, "reward_std": 0.8858467936515808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07545944925166885, "rewards/wordcountpos_reward/raw_geo/std": 0.167531101117853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195308, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1053.3125, "completions/mean_terminated_length": 1053.3125, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.03520704140828165, "frac_reward_zero_std": 0.0, "grad_norm": 3.249743672170572, "kl": 0.0018157958984375, "learning_rate": 3.5e-07, "loss": -0.0234, "num_tokens": 7654746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9871081113815308, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18222974852849785, "rewards/wordcountpos_reward/raw_geo/std": 0.2153333859668719, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1374.25, "completions/mean_terminated_length": 1298.800048828125, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.03540708141628326, "frac_reward_zero_std": 0.0, "grad_norm": 2.9997439451733117, "kl": 0.002349853515625, "learning_rate": 3.52e-07, "loss": 0.0008, "num_tokens": 7708446.0, "reward": -2.9802322387695312e-08, "reward_std": 0.49946585297584534, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040972830221256785, "rewards/wordcountpos_reward/raw_geo/std": 0.07527936855353783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1072.4000244140625, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.035607121424284856, "frac_reward_zero_std": 0.0, "grad_norm": 2.8033839667518556, "kl": 0.001506805419921875, "learning_rate": 3.5399999999999997e-07, "loss": 0.0316, "num_tokens": 7749808.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8020362854003906, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12622919683024994, "rewards/wordcountpos_reward/raw_geo/std": 0.11355599317983416, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1139.6875, "completions/mean_terminated_length": 1139.6875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.03580716143228646, "frac_reward_zero_std": 0.0, "grad_norm": 3.511053791986106, "kl": 0.002407073974609375, "learning_rate": 3.5599999999999996e-07, "loss": -0.022, "num_tokens": 7785219.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9724830389022827, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006258823342861898, "rewards/wordcountpos_reward/raw_geo/std": 0.031881013999506956, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1191.0, "completions/max_terminated_length": 1191.0, "completions/mean_length": 1044.125, "completions/mean_terminated_length": 1044.125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.03600720144028806, "frac_reward_zero_std": 0.0, "grad_norm": 3.792621189945623, "kl": 0.002574920654296875, "learning_rate": 3.5799999999999995e-07, "loss": 0.0226, "num_tokens": 7826149.0, "reward": 0.0, "reward_std": 0.4066470265388489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21186225242447967, "rewards/wordcountpos_reward/raw_geo/std": 0.22223985717674158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 962.625, "completions/mean_terminated_length": 962.625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.036207241448289657, "frac_reward_zero_std": 0.0, "grad_norm": 3.6343550815169596, "kl": 0.0024318695068359375, "learning_rate": 3.6e-07, "loss": -0.0037, "num_tokens": 7857279.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4665229618549347, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04970994981291875, "rewards/wordcountpos_reward/raw_geo/std": 0.07474689532909116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818892, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1016.625, "completions/mean_terminated_length": 1016.625, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.03640728145629126, "frac_reward_zero_std": 0.0, "grad_norm": 3.0631302802851623, "kl": 0.001911163330078125, "learning_rate": 3.62e-07, "loss": -0.0151, "num_tokens": 7893177.0, "reward": 0.0, "reward_std": 0.753804087638855, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06338939390482173, "rewards/wordcountpos_reward/raw_geo/std": 0.09367060952409818, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 948.75, "completions/mean_terminated_length": 948.75, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.03660732146429286, "frac_reward_zero_std": 0.0, "grad_norm": 3.292156999717403, "kl": 0.0016918182373046875, "learning_rate": 3.64e-07, "loss": -0.0134, "num_tokens": 7923349.0, "reward": 0.0, "reward_std": 1.0503833293914795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08098727139230245, "rewards/wordcountpos_reward/raw_geo/std": 0.044604403994638525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 963.5, "completions/mean_terminated_length": 963.5, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.03680736147229446, "frac_reward_zero_std": 0.0, "grad_norm": 3.95966920283632, "kl": 0.00257110595703125, "learning_rate": 3.6599999999999997e-07, "loss": -0.0042, "num_tokens": 7956021.0, "reward": -4.470348358154297e-08, "reward_std": 0.9801254272460938, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029633090662191715, "rewards/wordcountpos_reward/raw_geo/std": 0.10525538767250749, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 896.125, "completions/mean_terminated_length": 896.125, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.03700740148029606, "frac_reward_zero_std": 0.0, "grad_norm": 3.5878131532814628, "kl": 0.0019073486328125, "learning_rate": 3.6799999999999996e-07, "loss": 0.056, "num_tokens": 8002015.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5900082588195801, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10677080929386158, "rewards/wordcountpos_reward/raw_geo/std": 0.1142294824476264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195013, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 794.1875, "completions/mean_terminated_length": 794.1875, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.03720744148829766, "frac_reward_zero_std": 0.0, "grad_norm": 3.117803911303747, "kl": 0.0014848709106445312, "learning_rate": 3.7e-07, "loss": -0.0534, "num_tokens": 8026906.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6441549062728882, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15024412523999706, "rewards/wordcountpos_reward/raw_geo/std": 0.10655149461960717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 978.6875, "completions/mean_terminated_length": 978.6875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.03740748149629926, "frac_reward_zero_std": 0.0, "grad_norm": 3.6281579413008287, "kl": 0.002536773681640625, "learning_rate": 3.72e-07, "loss": -0.0011, "num_tokens": 8063797.0, "reward": 0.0, "reward_std": 0.20660829544067383, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.012286760964341002, "rewards/wordcountpos_reward/raw_geo/std": 0.12717403274496453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 919.25, "completions/mean_terminated_length": 919.25, "completions/min_length": 577.0, "completions/min_terminated_length": 577.0, "epoch": 0.03760752150430086, "frac_reward_zero_std": 0.0, "grad_norm": 3.1910180791571126, "kl": 0.0019450187683105469, "learning_rate": 3.74e-07, "loss": -0.0311, "num_tokens": 8113321.0, "reward": 0.0, "reward_std": 0.9163142442703247, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06298861282831102, "rewards/wordcountpos_reward/raw_geo/std": 0.05211119526252271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1122.6875, "completions/mean_terminated_length": 1122.6875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.03780756151230246, "frac_reward_zero_std": 0.0, "grad_norm": 3.5635114672816797, "kl": 0.00274658203125, "learning_rate": 3.76e-07, "loss": 0.0284, "num_tokens": 8154004.0, "reward": 0.0, "reward_std": 0.9957304000854492, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14741367005867437, "rewards/wordcountpos_reward/raw_geo/std": 0.1260090567269409, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 897.875, "completions/mean_terminated_length": 897.875, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.03800760152030406, "frac_reward_zero_std": 0.0, "grad_norm": 3.6803461718858737, "kl": 0.0024261474609375, "learning_rate": 3.7799999999999997e-07, "loss": 0.0043, "num_tokens": 8176850.0, "reward": 5.960464477539063e-08, "reward_std": 0.735680341720581, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0179335721239762, "rewards/wordcountpos_reward/raw_geo/std": 0.03944499043521186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1020.75, "completions/mean_terminated_length": 1020.75, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.038207641528305664, "frac_reward_zero_std": 0.0, "grad_norm": 3.4050753674672176, "kl": 0.0019073486328125, "learning_rate": 3.7999999999999996e-07, "loss": -0.0176, "num_tokens": 8218310.0, "reward": 2.9802322387695312e-08, "reward_std": 0.830951988697052, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03690125483256228, "rewards/wordcountpos_reward/raw_geo/std": 0.14765704633450594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 943.625, "completions/mean_terminated_length": 943.625, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.03840768153630726, "frac_reward_zero_std": 0.0, "grad_norm": 3.050901350685848, "kl": 0.001934051513671875, "learning_rate": 3.82e-07, "loss": -0.0076, "num_tokens": 8259848.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0370866060256958, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02772329477845906, "rewards/wordcountpos_reward/raw_geo/std": 0.13147103932548143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1116.625, "completions/mean_terminated_length": 1116.625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.03860772154430886, "frac_reward_zero_std": 0.0, "grad_norm": 2.705519281577542, "kl": 0.0014190673828125, "learning_rate": 3.84e-07, "loss": 0.0136, "num_tokens": 8310986.0, "reward": 0.0, "reward_std": 1.0658533573150635, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11494210881142039, "rewards/wordcountpos_reward/raw_geo/std": 0.09234127052439448, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999157, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1223.4375, "completions/mean_terminated_length": 1223.4375, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.038807761552310464, "frac_reward_zero_std": 0.0, "grad_norm": 3.279743374009339, "kl": 0.002254486083984375, "learning_rate": 3.86e-07, "loss": -0.0402, "num_tokens": 8364265.0, "reward": -2.9802322387695312e-08, "reward_std": 0.698399543762207, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006329398525197531, "rewards/wordcountpos_reward/raw_geo/std": 0.18347327662619942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1129.4375, "completions/mean_terminated_length": 1104.7333984375, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.03900780156031206, "frac_reward_zero_std": 0.0, "grad_norm": 3.0135102651744936, "kl": 0.001689910888671875, "learning_rate": 3.88e-07, "loss": 0.0392, "num_tokens": 8406576.0, "reward": 5.960464477539063e-08, "reward_std": 0.8149147033691406, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13263655748177008, "rewards/wordcountpos_reward/raw_geo/std": 0.0760463669434398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1027.6875, "completions/mean_terminated_length": 1027.6875, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.03920784156831366, "frac_reward_zero_std": 0.0, "grad_norm": 3.1361277714137272, "kl": 0.002071380615234375, "learning_rate": 3.8999999999999997e-07, "loss": -0.0091, "num_tokens": 8442395.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8963784575462341, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0759340655854741, "rewards/wordcountpos_reward/raw_geo/std": 0.08140668260798975, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1151.1875, "completions/mean_terminated_length": 1127.933349609375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.039407881576315265, "frac_reward_zero_std": 0.0, "grad_norm": 3.727651298566592, "kl": 0.00257110595703125, "learning_rate": 3.92e-07, "loss": -0.0052, "num_tokens": 8480838.0, "reward": 0.0, "reward_std": 0.9072641134262085, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06784537601579145, "rewards/wordcountpos_reward/raw_geo/std": 0.13219449132676328, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1213.3125, "completions/mean_terminated_length": 1194.2000732421875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.03960792158431686, "frac_reward_zero_std": 0.0, "grad_norm": 2.2825428905305376, "kl": 0.0008411407470703125, "learning_rate": 3.94e-07, "loss": -0.0021, "num_tokens": 8524347.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9728500843048096, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012441524437029899, "rewards/wordcountpos_reward/raw_geo/std": 0.09089370630767515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1351.9375, "completions/mean_terminated_length": 1317.769287109375, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.03980796159231846, "frac_reward_zero_std": 0.0, "grad_norm": 2.973005495849857, "kl": 0.0021762847900390625, "learning_rate": 3.96e-07, "loss": -0.0021, "num_tokens": 8580450.0, "reward": 0.0, "reward_std": 0.5499659776687622, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04566220374326516, "rewards/wordcountpos_reward/raw_geo/std": 0.06935933060320619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14700718047466632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 920.625, "completions/mean_terminated_length": 920.625, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.040008001600320066, "frac_reward_zero_std": 0.0, "grad_norm": 3.727080114938833, "kl": 0.00251007080078125, "learning_rate": 3.98e-07, "loss": -0.022, "num_tokens": 8620556.0, "reward": -7.450580596923828e-09, "reward_std": 1.0630748271942139, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07240710981674245, "rewards/wordcountpos_reward/raw_geo/std": 0.15219866094924211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1074.3125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.040208041608321664, "frac_reward_zero_std": 0.0, "grad_norm": 3.2654280009746417, "kl": 0.002960205078125, "learning_rate": 4e-07, "loss": -0.1168, "num_tokens": 8669105.0, "reward": 0.0, "reward_std": 0.6544532775878906, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08119934346316145, "rewards/wordcountpos_reward/raw_geo/std": 0.06678370897685837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1182.25, "completions/mean_terminated_length": 1161.0667724609375, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.04040808161632326, "frac_reward_zero_std": 0.0, "grad_norm": 3.340283569297992, "kl": 0.00260162353515625, "learning_rate": 4.02e-07, "loss": -0.0475, "num_tokens": 8713269.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8794863224029541, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16211102509859834, "rewards/wordcountpos_reward/raw_geo/std": 0.08923403825898198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1290.375, "completions/mean_terminated_length": 1164.5999755859375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.04060812162432487, "frac_reward_zero_std": 0.0, "grad_norm": 3.1223416405331483, "kl": 0.002468109130859375, "learning_rate": 4.04e-07, "loss": -0.0256, "num_tokens": 8762155.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6992528438568115, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026972375505145148, "rewards/wordcountpos_reward/raw_geo/std": 0.1517567111786578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1264.6875, "completions/mean_terminated_length": 1157.727294921875, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.040808161632326465, "frac_reward_zero_std": 0.0, "grad_norm": 2.502828108217289, "kl": 0.001190185546875, "learning_rate": 4.06e-07, "loss": -0.0334, "num_tokens": 8812862.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7799094319343567, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0060468740045577386, "rewards/wordcountpos_reward/raw_geo/std": 0.1342806864506098, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1025.0, "completions/mean_terminated_length": 1025.0, "completions/min_length": 680.0, "completions/min_terminated_length": 680.0, "epoch": 0.04100820164032806, "frac_reward_zero_std": 0.0, "grad_norm": 3.8722147766190846, "kl": 0.002590179443359375, "learning_rate": 4.0799999999999995e-07, "loss": 0.0362, "num_tokens": 8852110.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0247706174850464, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07381078917153294, "rewards/wordcountpos_reward/raw_geo/std": 0.0953837459139895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1314.25, "completions/mean_terminated_length": 1301.86669921875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.04120824164832967, "frac_reward_zero_std": 0.0, "grad_norm": 2.5362502325345195, "kl": 0.0015392303466796875, "learning_rate": 4.0999999999999994e-07, "loss": -0.0439, "num_tokens": 8898666.0, "reward": 0.0, "reward_std": 0.9100049734115601, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12093206963526529, "rewards/wordcountpos_reward/raw_geo/std": 0.08009363976804447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1272.8125, "completions/mean_terminated_length": 1197.0833740234375, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.041408281656331265, "frac_reward_zero_std": 0.0, "grad_norm": 2.5468386215078263, "kl": 0.0012402534484863281, "learning_rate": 4.12e-07, "loss": -0.0095, "num_tokens": 8949423.0, "reward": 0.0, "reward_std": 0.9750853776931763, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003554812335247671, "rewards/wordcountpos_reward/raw_geo/std": 0.19559240150971294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1176.5625, "completions/mean_terminated_length": 1176.5625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.04160832166433286, "frac_reward_zero_std": 0.0, "grad_norm": 3.3752780921817847, "kl": 0.002216339111328125, "learning_rate": 4.14e-07, "loss": -0.0072, "num_tokens": 8988568.0, "reward": 0.0, "reward_std": 0.871620774269104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.114779931210139, "rewards/wordcountpos_reward/raw_geo/std": 0.09476311625242048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1208.375, "completions/mean_terminated_length": 1075.8182373046875, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.04180836167233447, "frac_reward_zero_std": 0.0, "grad_norm": 2.845883479966067, "kl": 0.0020427703857421875, "learning_rate": 4.1599999999999997e-07, "loss": -0.03, "num_tokens": 9036454.0, "reward": 0.0, "reward_std": 0.560102105140686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04502701724073032, "rewards/wordcountpos_reward/raw_geo/std": 0.13194859264475114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 933.0, "completions/max_terminated_length": 933.0, "completions/mean_length": 844.6875, "completions/mean_terminated_length": 844.6875, "completions/min_length": 627.0, "completions/min_terminated_length": 627.0, "epoch": 0.042008401680336066, "frac_reward_zero_std": 0.0, "grad_norm": 1.2202481999915225, "kl": 0.0003336668014526367, "learning_rate": 4.1799999999999996e-07, "loss": -0.0047, "num_tokens": 9074257.0, "reward": 0.0, "reward_std": 0.8350205421447754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.016202605595714716, "rewards/wordcountpos_reward/raw_geo/std": 0.0665817288609625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1150.5, "completions/mean_terminated_length": 1127.2000732421875, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.04220844168833767, "frac_reward_zero_std": 0.0, "grad_norm": 2.8463190345853056, "kl": 0.0017261505126953125, "learning_rate": 4.1999999999999995e-07, "loss": 0.0259, "num_tokens": 9125953.0, "reward": 0.0, "reward_std": 0.9862473607063293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03530538009471619, "rewards/wordcountpos_reward/raw_geo/std": 0.05835435454978616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1136.25, "completions/mean_terminated_length": 1112.0, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.04240848169633927, "frac_reward_zero_std": 0.0, "grad_norm": 2.8117754547957317, "kl": 0.001636505126953125, "learning_rate": 4.2199999999999994e-07, "loss": -0.0816, "num_tokens": 9175245.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9156209230422974, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02290077514995597, "rewards/wordcountpos_reward/raw_geo/std": 0.33724278352130804, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.04260852170434087, "frac_reward_zero_std": 0.0, "grad_norm": 3.170167923664273, "kl": 0.00215911865234375, "learning_rate": 4.24e-07, "loss": -0.0064, "num_tokens": 9215983.0, "reward": 0.0, "reward_std": 0.6473881602287292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1045517076384495, "rewards/wordcountpos_reward/raw_geo/std": 0.15889560604122613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16953094331342802, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1024.25, "completions/mean_terminated_length": 1024.25, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.04280856171234247, "frac_reward_zero_std": 0.0, "grad_norm": 2.938371293387922, "kl": 0.001605987548828125, "learning_rate": 4.26e-07, "loss": -0.0286, "num_tokens": 9257243.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5818344354629517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06563991627814246, "rewards/wordcountpos_reward/raw_geo/std": 0.11607894553999797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1096.5, "completions/mean_terminated_length": 1038.857177734375, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.04300860172034407, "frac_reward_zero_std": 0.0, "grad_norm": 3.5631244307040775, "kl": 0.002407073974609375, "learning_rate": 4.2799999999999997e-07, "loss": -0.1163, "num_tokens": 9301571.0, "reward": 1.4901161193847656e-08, "reward_std": 0.949698805809021, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10345276668112834, "rewards/wordcountpos_reward/raw_geo/std": 0.12313624374139397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 799.8125, "completions/mean_terminated_length": 799.8125, "completions/min_length": 491.0, "completions/min_terminated_length": 491.0, "epoch": 0.04320864172834567, "frac_reward_zero_std": 0.0, "grad_norm": 3.769714070681134, "kl": 0.002651214599609375, "learning_rate": 4.2999999999999996e-07, "loss": -0.0269, "num_tokens": 9336920.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9266948699951172, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0498792099029989, "rewards/wordcountpos_reward/raw_geo/std": 0.06668889560042873, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1173.3125, "completions/mean_terminated_length": 1151.533447265625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.04340868173634727, "frac_reward_zero_std": 0.0, "grad_norm": 2.642290304005401, "kl": 0.000865936279296875, "learning_rate": 4.3199999999999995e-07, "loss": 0.0067, "num_tokens": 9370333.0, "reward": 1.862645149230957e-09, "reward_std": 0.9393295049667358, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.15558568939274872, "rewards/wordcountpos_reward/raw_geo/std": 0.29255120503185744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 874.5, "completions/mean_terminated_length": 874.5, "completions/min_length": 711.0, "completions/min_terminated_length": 711.0, "epoch": 0.04360872174434887, "frac_reward_zero_std": 0.0, "grad_norm": 2.221006878283574, "kl": 0.0013418197631835938, "learning_rate": 4.34e-07, "loss": 0.0133, "num_tokens": 9406805.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9754551649093628, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07210750638126699, "rewards/wordcountpos_reward/raw_geo/std": 0.023868796326206483, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1242.4375, "completions/mean_terminated_length": 1225.2667236328125, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.04380876175235047, "frac_reward_zero_std": 0.0, "grad_norm": 1.962652254850243, "kl": 0.000919342041015625, "learning_rate": 4.36e-07, "loss": -0.0226, "num_tokens": 9466236.0, "reward": 0.0, "reward_std": 0.8539537191390991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010749596747872818, "rewards/wordcountpos_reward/raw_geo/std": 0.11901292224974022, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1082.375, "completions/mean_terminated_length": 1082.375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.04400880176035207, "frac_reward_zero_std": 0.0, "grad_norm": 3.2792461401361157, "kl": 0.00229644775390625, "learning_rate": 4.38e-07, "loss": 0.0009, "num_tokens": 9512418.0, "reward": -2.9802322387695312e-08, "reward_std": 0.770158052444458, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.33078046995944205, "rewards/wordcountpos_reward/raw_geo/std": 0.23102998210724113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1089.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 930.375, "completions/mean_terminated_length": 930.375, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.04420884176835367, "frac_reward_zero_std": 0.0, "grad_norm": 3.6569481673546904, "kl": 0.00208282470703125, "learning_rate": 4.3999999999999997e-07, "loss": 0.0179, "num_tokens": 9560944.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4487355053424835, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05540984573235712, "rewards/wordcountpos_reward/raw_geo/std": 0.06647964140307619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1031.0, "completions/max_terminated_length": 1031.0, "completions/mean_length": 869.5625, "completions/mean_terminated_length": 869.5625, "completions/min_length": 614.0, "completions/min_terminated_length": 614.0, "epoch": 0.04440888177635527, "frac_reward_zero_std": 0.0, "grad_norm": 4.361008163837978, "kl": 0.003299713134765625, "learning_rate": 4.4199999999999996e-07, "loss": -0.0178, "num_tokens": 9594473.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0673775672912598, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14855897842667443, "rewards/wordcountpos_reward/raw_geo/std": 0.19004606810878735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.625, "rewards/wordcountpos_reward/raw_rule/std": 0.22949219304078008, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1136.5, "completions/mean_terminated_length": 1136.5, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.044608921784356874, "frac_reward_zero_std": 0.0, "grad_norm": 3.5246563719784905, "kl": 0.00261688232421875, "learning_rate": 4.44e-07, "loss": -0.0045, "num_tokens": 9644649.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8183039426803589, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2731578987364741, "rewards/wordcountpos_reward/raw_geo/std": 0.34677137497788774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 918.8125, "completions/mean_terminated_length": 880.0667114257812, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.04480896179235847, "frac_reward_zero_std": 0.0, "grad_norm": 2.5983836740091224, "kl": 0.00209808349609375, "learning_rate": 4.46e-07, "loss": 0.054, "num_tokens": 9691310.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8674355745315552, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417420603100946, "rewards/wordcountpos_reward/raw_geo/std": 0.31090114155806386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.6458333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.21322045624883876, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1293.5, "completions/mean_terminated_length": 1293.5, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.04500900180036007, "frac_reward_zero_std": 0.0, "grad_norm": 2.5619520266167988, "kl": 0.001697540283203125, "learning_rate": 4.48e-07, "loss": -0.0331, "num_tokens": 9734798.0, "reward": 0.0, "reward_std": 0.6951881647109985, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13601809935061862, "rewards/wordcountpos_reward/raw_geo/std": 0.2861767604950857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1065.9375, "completions/mean_terminated_length": 1065.9375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.045209041808361675, "frac_reward_zero_std": 0.0, "grad_norm": 3.0972722370830534, "kl": 0.002086639404296875, "learning_rate": 4.5e-07, "loss": -0.0294, "num_tokens": 9776397.0, "reward": 0.0, "reward_std": 0.37369710206985474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07690115437370365, "rewards/wordcountpos_reward/raw_geo/std": 0.17665170899241697, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1174.3333740234375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.04540908181636327, "frac_reward_zero_std": 0.0, "grad_norm": 2.6818268509468783, "kl": 0.001720428466796875, "learning_rate": 4.5199999999999997e-07, "loss": -0.0321, "num_tokens": 9825224.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7789763808250427, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14412644640263858, "rewards/wordcountpos_reward/raw_geo/std": 0.19802116435283593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1071.5, "completions/mean_terminated_length": 1071.5, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.04560912182436487, "frac_reward_zero_std": 0.0, "grad_norm": 3.4297091146136736, "kl": 0.0022220611572265625, "learning_rate": 4.54e-07, "loss": -0.0474, "num_tokens": 9865968.0, "reward": 0.0, "reward_std": 0.7685360908508301, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09570066720474164, "rewards/wordcountpos_reward/raw_geo/std": 0.09805851963071717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1061.375, "completions/mean_terminated_length": 1032.1334228515625, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.045809161832366475, "frac_reward_zero_std": 0.0, "grad_norm": 3.3764199769472514, "kl": 0.0021114349365234375, "learning_rate": 4.56e-07, "loss": 0.0192, "num_tokens": 9908422.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0675699710845947, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03232322866633074, "rewards/wordcountpos_reward/raw_geo/std": 0.052356298330070904, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1045.5625, "completions/mean_terminated_length": 1045.5625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.04600920184036807, "frac_reward_zero_std": 0.0, "grad_norm": 3.6358476431972786, "kl": 0.002521514892578125, "learning_rate": 4.58e-07, "loss": 0.0086, "num_tokens": 9949439.0, "reward": 0.0, "reward_std": 0.7558891177177429, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11293083128848397, "rewards/wordcountpos_reward/raw_geo/std": 0.26460484652038063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13817594795257457, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 979.375, "completions/mean_terminated_length": 979.375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.04620924184836967, "frac_reward_zero_std": 0.0, "grad_norm": 2.9432527026487554, "kl": 0.00164031982421875, "learning_rate": 4.6e-07, "loss": -0.0151, "num_tokens": 9987453.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7914766073226929, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2776319812833346, "rewards/wordcountpos_reward/raw_geo/std": 0.2091878370836691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 935.5, "completions/mean_terminated_length": 935.5, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.046409281856371276, "frac_reward_zero_std": 0.0, "grad_norm": 3.501816611091303, "kl": 0.0021820068359375, "learning_rate": 4.62e-07, "loss": 0.0098, "num_tokens": 10026053.0, "reward": 7.450580596923828e-09, "reward_std": 1.061112642288208, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0009388975839314043, "rewards/wordcountpos_reward/raw_geo/std": 0.25690021770429855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639732, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1156.75, "completions/mean_terminated_length": 1133.86669921875, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.046609321864372874, "frac_reward_zero_std": 0.0, "grad_norm": 3.197590654024074, "kl": 0.002071380615234375, "learning_rate": 4.64e-07, "loss": 0.0031, "num_tokens": 10074593.0, "reward": 0.0, "reward_std": 0.9700093865394592, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05754481591005366, "rewards/wordcountpos_reward/raw_geo/std": 0.131811020957611, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 955.4375, "completions/mean_terminated_length": 955.4375, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.04680936187237447, "frac_reward_zero_std": 0.0, "grad_norm": 3.417303615929551, "kl": 0.0023059844970703125, "learning_rate": 4.66e-07, "loss": 0.0006, "num_tokens": 10101864.0, "reward": 7.450580596923828e-09, "reward_std": 1.053598403930664, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0794532384301168, "rewards/wordcountpos_reward/raw_geo/std": 0.0891815204166084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 917.25, "completions/mean_terminated_length": 917.25, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.04700940188037608, "frac_reward_zero_std": 0.0, "grad_norm": 2.5029887243756614, "kl": 0.0009889602661132812, "learning_rate": 4.68e-07, "loss": -0.0065, "num_tokens": 10144820.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4073413610458374, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13657683348336547, "rewards/wordcountpos_reward/raw_geo/std": 0.11450675754461333, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1328.5, "completions/mean_terminated_length": 1225.5999755859375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.047209441888377675, "frac_reward_zero_std": 0.0, "grad_norm": 3.126877234182285, "kl": 0.002269744873046875, "learning_rate": 4.6999999999999995e-07, "loss": -0.0216, "num_tokens": 10191940.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7219676971435547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1363078346597265, "rewards/wordcountpos_reward/raw_geo/std": 0.3528697059083918, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1000.25, "completions/mean_terminated_length": 1000.25, "completions/min_length": 621.0, "completions/min_terminated_length": 621.0, "epoch": 0.04740948189637927, "frac_reward_zero_std": 0.0, "grad_norm": 3.6674717025459933, "kl": 0.002590179443359375, "learning_rate": 4.7199999999999994e-07, "loss": -0.0247, "num_tokens": 10228712.0, "reward": 0.0, "reward_std": 0.7236359119415283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21741912367146687, "rewards/wordcountpos_reward/raw_geo/std": 0.05995470328566925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 957.125, "completions/mean_terminated_length": 957.125, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.04760952190438088, "frac_reward_zero_std": 0.0, "grad_norm": 3.5578471776175284, "kl": 0.002086639404296875, "learning_rate": 4.7399999999999993e-07, "loss": 0.0516, "num_tokens": 10277978.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0124543905258179, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09208478336285762, "rewards/wordcountpos_reward/raw_geo/std": 0.08017650590902214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1146.5, "completions/mean_terminated_length": 1122.933349609375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.047809561912382476, "frac_reward_zero_std": 0.0, "grad_norm": 2.8573036743864857, "kl": 0.0019283294677734375, "learning_rate": 4.76e-07, "loss": -0.0165, "num_tokens": 10330346.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8940658569335938, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2515121205060752, "rewards/wordcountpos_reward/raw_geo/std": 0.1708536332043649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1082.533447265625, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.048009601920384073, "frac_reward_zero_std": 0.0, "grad_norm": 3.2899874989285482, "kl": 0.0022525787353515625, "learning_rate": 4.779999999999999e-07, "loss": -0.0253, "num_tokens": 10377028.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7602491974830627, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14163505990739514, "rewards/wordcountpos_reward/raw_geo/std": 0.11600805262689726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 1003.625, "completions/mean_terminated_length": 1003.625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.04820964192838568, "frac_reward_zero_std": 0.0, "grad_norm": 2.781234983223836, "kl": 0.0013475418090820312, "learning_rate": 4.8e-07, "loss": 0.0168, "num_tokens": 10419478.0, "reward": 0.0, "reward_std": 0.6488606333732605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16687250177532015, "rewards/wordcountpos_reward/raw_geo/std": 0.19042451899573043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 954.75, "completions/mean_terminated_length": 954.75, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.048409681936387276, "frac_reward_zero_std": 0.0, "grad_norm": 3.769629689363409, "kl": 0.002178192138671875, "learning_rate": 4.82e-07, "loss": -0.0437, "num_tokens": 10461786.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8474140167236328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16588000018033014, "rewards/wordcountpos_reward/raw_geo/std": 0.3314221813736554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 820.0, "completions/mean_terminated_length": 820.0, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.04860972194438888, "frac_reward_zero_std": 0.0, "grad_norm": 3.1142124243308373, "kl": 0.0012178421020507812, "learning_rate": 4.839999999999999e-07, "loss": 0.0013, "num_tokens": 10493034.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7323605418205261, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05774076177500897, "rewards/wordcountpos_reward/raw_geo/std": 0.13265386704689375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 1028.375, "completions/mean_terminated_length": 1028.375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.04880976195239048, "frac_reward_zero_std": 0.0, "grad_norm": 1.7371766949160146, "kl": 0.00049591064453125, "learning_rate": 4.86e-07, "loss": 0.0089, "num_tokens": 10536736.0, "reward": 0.0, "reward_std": 1.009200096130371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010877500520318774, "rewards/wordcountpos_reward/raw_geo/std": 0.05590197255036356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1230.375, "completions/mean_terminated_length": 1140.5, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.04900980196039208, "frac_reward_zero_std": 0.0, "grad_norm": 2.9596381596370973, "kl": 0.0020198822021484375, "learning_rate": 4.879999999999999e-07, "loss": -0.0026, "num_tokens": 10584798.0, "reward": 0.0, "reward_std": 0.908205509185791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07778093230453198, "rewards/wordcountpos_reward/raw_geo/std": 0.0701129405116386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1163.3125, "completions/mean_terminated_length": 1140.86669921875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.04920984196839368, "frac_reward_zero_std": 0.0, "grad_norm": 2.339328394757649, "kl": 0.0009527206420898438, "learning_rate": 4.9e-07, "loss": -0.0075, "num_tokens": 10624059.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0123875141143799, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.031379049980629146, "rewards/wordcountpos_reward/raw_geo/std": 0.11357900524634221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639735, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1064.8125, "completions/mean_terminated_length": 1035.800048828125, "completions/min_length": 487.0, "completions/min_terminated_length": 487.0, "epoch": 0.04940988197639528, "frac_reward_zero_std": 0.0, "grad_norm": 2.562922333983465, "kl": 0.0017910003662109375, "learning_rate": 4.92e-07, "loss": -0.0635, "num_tokens": 10658624.0, "reward": 0.0, "reward_std": 0.6656528115272522, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0886025617532563, "rewards/wordcountpos_reward/raw_geo/std": 0.10955627817292048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1065.9375, "completions/mean_terminated_length": 1065.9375, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.04960992198439688, "frac_reward_zero_std": 0.0, "grad_norm": 3.4725363438468877, "kl": 0.0019683837890625, "learning_rate": 4.94e-07, "loss": 0.0172, "num_tokens": 10706671.0, "reward": 0.0, "reward_std": 0.7576044797897339, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012261243461389371, "rewards/wordcountpos_reward/raw_geo/std": 0.09734405789336316, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1201.25, "completions/mean_terminated_length": 1158.571533203125, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.04980996199239848, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682384067334424, "kl": 0.0017547607421875, "learning_rate": 4.96e-07, "loss": -0.0114, "num_tokens": 10745723.0, "reward": 1.30385160446167e-08, "reward_std": 0.997305154800415, "rewards/wordcountpos_reward/mean": 1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.41643560115602263, "rewards/wordcountpos_reward/raw_geo/std": 0.053809601866069846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1322.375, "completions/mean_terminated_length": 1263.166748046875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.05001000200040008, "frac_reward_zero_std": 0.0, "grad_norm": 2.498170494033239, "kl": 0.0016222000122070312, "learning_rate": 4.979999999999999e-07, "loss": -0.0182, "num_tokens": 10798585.0, "reward": 0.0, "reward_std": 0.7340776324272156, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.049307774680816895, "rewards/wordcountpos_reward/raw_geo/std": 0.06508276053191246, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.048495895206211566, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1026.8125, "completions/mean_terminated_length": 995.2667236328125, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.05021004200840168, "frac_reward_zero_std": 0.0, "grad_norm": 3.082007906039928, "kl": 0.0015439987182617188, "learning_rate": 5e-07, "loss": -0.0155, "num_tokens": 10833454.0, "reward": 0.0, "reward_std": 0.5358977317810059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009103681466487323, "rewards/wordcountpos_reward/raw_geo/std": 0.15289171081351013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 956.875, "completions/mean_terminated_length": 956.875, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.05041008201640328, "frac_reward_zero_std": 0.0, "grad_norm": 2.580286635495049, "kl": 0.0013780593872070312, "learning_rate": 5.02e-07, "loss": 0.0091, "num_tokens": 10878548.0, "reward": 0.0, "reward_std": 0.9386855363845825, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14375805416449897, "rewards/wordcountpos_reward/raw_geo/std": 0.0712602892053773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1224.0, "completions/mean_terminated_length": 1205.60009765625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.05061012202440488, "frac_reward_zero_std": 0.0, "grad_norm": 1.9276289839627134, "kl": 0.0009131431579589844, "learning_rate": 5.04e-07, "loss": 0.0239, "num_tokens": 10924924.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0571565628051758, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1659473574763683, "rewards/wordcountpos_reward/raw_geo/std": 0.18580689257688182, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1219.0625, "completions/mean_terminated_length": 1178.9285888671875, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.05081016203240648, "frac_reward_zero_std": 0.0, "grad_norm": 2.922856681083572, "kl": 0.0017871856689453125, "learning_rate": 5.06e-07, "loss": 0.004, "num_tokens": 10970213.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7152448892593384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07250068745291727, "rewards/wordcountpos_reward/raw_geo/std": 0.17110556123388107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1252.0625, "completions/mean_terminated_length": 1139.3636474609375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.051010202040408084, "frac_reward_zero_std": 0.0, "grad_norm": 2.9540837028324844, "kl": 0.001926422119140625, "learning_rate": 5.079999999999999e-07, "loss": 0.0086, "num_tokens": 11020030.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9681185483932495, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00919053917813432, "rewards/wordcountpos_reward/raw_geo/std": 0.04797649848379129, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1313.125, "completions/mean_terminated_length": 1250.8333740234375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.05121024204840968, "frac_reward_zero_std": 0.0, "grad_norm": 3.0333475841007367, "kl": 0.00246429443359375, "learning_rate": 5.1e-07, "loss": -0.032, "num_tokens": 11077224.0, "reward": 0.0, "reward_std": 0.6521957516670227, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01088874880562371, "rewards/wordcountpos_reward/raw_geo/std": 0.19445062691212478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1261.8125, "completions/mean_terminated_length": 1206.84619140625, "completions/min_length": 1047.0, "completions/min_terminated_length": 1047.0, "epoch": 0.05141028205641128, "frac_reward_zero_std": 0.0, "grad_norm": 3.1676015906229225, "kl": 0.00247955322265625, "learning_rate": 5.12e-07, "loss": -0.0159, "num_tokens": 11131957.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0640606880187988, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017548991635790653, "rewards/wordcountpos_reward/raw_geo/std": 0.11961394967139573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 964.5, "completions/mean_terminated_length": 888.0000610351562, "completions/min_length": 568.0, "completions/min_terminated_length": 568.0, "epoch": 0.051610322064412885, "frac_reward_zero_std": 0.0, "grad_norm": 2.949807245318231, "kl": 0.00179290771484375, "learning_rate": 5.14e-07, "loss": -0.0029, "num_tokens": 11171661.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7342426776885986, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03695731594706882, "rewards/wordcountpos_reward/raw_geo/std": 0.16550425037823152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1097.75, "completions/mean_terminated_length": 1070.933349609375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.05181036207241448, "frac_reward_zero_std": 0.0, "grad_norm": 3.2597310261177537, "kl": 0.0020351409912109375, "learning_rate": 5.16e-07, "loss": 0.0015, "num_tokens": 11213953.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9189243316650391, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0337706933685308, "rewards/wordcountpos_reward/raw_geo/std": 0.05898891676744919, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 1029.1875, "completions/mean_terminated_length": 997.800048828125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.05201040208041608, "frac_reward_zero_std": 0.0, "grad_norm": 3.500006324284666, "kl": 0.002288818359375, "learning_rate": 5.18e-07, "loss": -0.0266, "num_tokens": 11259436.0, "reward": 1.4901161193847656e-08, "reward_std": 1.016369104385376, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09808513341577466, "rewards/wordcountpos_reward/raw_geo/std": 0.0511581479541605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 921.25, "completions/mean_terminated_length": 921.25, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.052210442088417686, "frac_reward_zero_std": 0.0, "grad_norm": 3.3527336673424566, "kl": 0.0023040771484375, "learning_rate": 5.2e-07, "loss": -0.0288, "num_tokens": 11308728.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9681817889213562, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23922034870195008, "rewards/wordcountpos_reward/raw_geo/std": 0.4189218739249153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1043.375, "completions/mean_terminated_length": 1043.375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.052410482096419284, "frac_reward_zero_std": 0.0, "grad_norm": 3.789090010532537, "kl": 0.002643585205078125, "learning_rate": 5.22e-07, "loss": -0.022, "num_tokens": 11355198.0, "reward": 0.0, "reward_std": 0.6457654237747192, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05679817341659235, "rewards/wordcountpos_reward/raw_geo/std": 0.05172502433684633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941139, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1155.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 876.5625, "completions/mean_terminated_length": 876.5625, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.05261052210442088, "frac_reward_zero_std": 0.0, "grad_norm": 4.348528439154449, "kl": 0.00286865234375, "learning_rate": 5.24e-07, "loss": 0.0522, "num_tokens": 11393047.0, "reward": 0.0, "reward_std": 0.6818137168884277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009604823762648262, "rewards/wordcountpos_reward/raw_geo/std": 0.0930052670633696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1274.8125, "completions/mean_terminated_length": 1259.800048828125, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.052810562112422486, "frac_reward_zero_std": 0.0, "grad_norm": 2.6796853844591833, "kl": 0.0017719268798828125, "learning_rate": 5.26e-07, "loss": -0.0074, "num_tokens": 11441044.0, "reward": -7.450580596923828e-09, "reward_std": 0.9428989887237549, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.12655737946537823, "rewards/wordcountpos_reward/raw_geo/std": 0.143888059477041, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.053010602120424084, "frac_reward_zero_std": 0.0, "grad_norm": 3.2411456624804784, "kl": 0.002246856689453125, "learning_rate": 5.28e-07, "loss": -0.0297, "num_tokens": 11486642.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6332656145095825, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1801774123872189, "rewards/wordcountpos_reward/raw_geo/std": 0.12452352156407756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1189.125, "completions/mean_terminated_length": 1189.125, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.05321064212842568, "frac_reward_zero_std": 0.0, "grad_norm": 3.36758558767966, "kl": 0.00261688232421875, "learning_rate": 5.3e-07, "loss": -0.0447, "num_tokens": 11535940.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9821509122848511, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03995858950419156, "rewards/wordcountpos_reward/raw_geo/std": 0.0922515695501585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1353.6875, "completions/mean_terminated_length": 1265.9000244140625, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.05341068213642729, "frac_reward_zero_std": 0.0, "grad_norm": 2.69135869048864, "kl": 0.00201416015625, "learning_rate": 5.32e-07, "loss": -0.0154, "num_tokens": 11594359.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0331776142120361, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04879070979342799, "rewards/wordcountpos_reward/raw_geo/std": 0.060374116114327976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 956.1875, "completions/mean_terminated_length": 956.1875, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.053610722144428885, "frac_reward_zero_std": 0.0, "grad_norm": 3.565425749584275, "kl": 0.00189971923828125, "learning_rate": 5.34e-07, "loss": -0.0061, "num_tokens": 11630762.0, "reward": -9.313225746154785e-09, "reward_std": 0.9770306944847107, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.009862841180942467, "rewards/wordcountpos_reward/raw_geo/std": 0.10174287325257836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.072520750542581, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1040.0, "completions/max_terminated_length": 1040.0, "completions/mean_length": 811.25, "completions/mean_terminated_length": 811.25, "completions/min_length": 549.0, "completions/min_terminated_length": 549.0, "epoch": 0.05381076215243048, "frac_reward_zero_std": 0.0, "grad_norm": 3.5042565406928774, "kl": 0.001926422119140625, "learning_rate": 5.36e-07, "loss": 0.0255, "num_tokens": 11655262.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8735646605491638, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01877535841962073, "rewards/wordcountpos_reward/raw_geo/std": 0.08614063247566738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1140.4375, "completions/mean_terminated_length": 1116.4666748046875, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.05401080216043209, "frac_reward_zero_std": 0.0, "grad_norm": 3.4586928748034422, "kl": 0.002590179443359375, "learning_rate": 5.38e-07, "loss": -0.1136, "num_tokens": 11708469.0, "reward": 0.0, "reward_std": 1.0621317625045776, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12225851999280535, "rewards/wordcountpos_reward/raw_geo/std": 0.07800859857537935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 957.8125, "completions/mean_terminated_length": 957.8125, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.054210842168433686, "frac_reward_zero_std": 0.0, "grad_norm": 2.738653175944796, "kl": 0.0013217926025390625, "learning_rate": 5.4e-07, "loss": -0.0103, "num_tokens": 11741914.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8998227715492249, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025634164800156116, "rewards/wordcountpos_reward/raw_geo/std": 0.06844079930998706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891874, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1063.0667724609375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.054410882176435284, "frac_reward_zero_std": 0.0, "grad_norm": 3.6735008716436566, "kl": 0.00262451171875, "learning_rate": 5.420000000000001e-07, "loss": 0.0283, "num_tokens": 11784144.0, "reward": 0.0, "reward_std": 1.0356578826904297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.017954077836690237, "rewards/wordcountpos_reward/raw_geo/std": 0.13486503139722647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 1016.9375, "completions/mean_terminated_length": 1016.9375, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.05461092218443689, "frac_reward_zero_std": 0.0, "grad_norm": 4.078931473141189, "kl": 0.003021240234375, "learning_rate": 5.44e-07, "loss": -0.0128, "num_tokens": 11823663.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4060839116573334, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1317357823676234, "rewards/wordcountpos_reward/raw_geo/std": 0.13562679679250417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 979.4375, "completions/mean_terminated_length": 979.4375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.05481096219243849, "frac_reward_zero_std": 0.0, "grad_norm": 2.7091414346538256, "kl": 0.00147247314453125, "learning_rate": 5.46e-07, "loss": -0.0071, "num_tokens": 11872710.0, "reward": 0.0, "reward_std": 0.904438853263855, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2436913189742776, "rewards/wordcountpos_reward/raw_geo/std": 0.17202725688187753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.06206328908341753, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1149.1875, "completions/mean_terminated_length": 1125.800048828125, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.05501100220044009, "frac_reward_zero_std": 0.0, "grad_norm": 3.3131370072683772, "kl": 0.00223541259765625, "learning_rate": 5.48e-07, "loss": 0.0336, "num_tokens": 11914897.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8671855926513672, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00042998981095781173, "rewards/wordcountpos_reward/raw_geo/std": 0.16643644225434123, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459203, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1165.0, "completions/max_terminated_length": 1165.0, "completions/mean_length": 958.5, "completions/mean_terminated_length": 958.5, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.05521104220844169, "frac_reward_zero_std": 0.0, "grad_norm": 3.33414768511492, "kl": 0.0020503997802734375, "learning_rate": 5.5e-07, "loss": -0.0201, "num_tokens": 11950849.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0278414487838745, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1112860828530976, "rewards/wordcountpos_reward/raw_geo/std": 0.13677217120618296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1219.375, "completions/mean_terminated_length": 1125.8333740234375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.05541108221644329, "frac_reward_zero_std": 0.0, "grad_norm": 3.194524162908702, "kl": 0.002140045166015625, "learning_rate": 5.520000000000001e-07, "loss": 0.0264, "num_tokens": 12002615.0, "reward": 0.0, "reward_std": 0.733389139175415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04092493020791305, "rewards/wordcountpos_reward/raw_geo/std": 0.25423442561951226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 967.625, "completions/mean_terminated_length": 967.625, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.05561112222444489, "frac_reward_zero_std": 0.0, "grad_norm": 3.7100092503320248, "kl": 0.00249481201171875, "learning_rate": 5.54e-07, "loss": 0.0401, "num_tokens": 12045769.0, "reward": 2.9802322387695312e-08, "reward_std": 0.62291020154953, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02627827228191887, "rewards/wordcountpos_reward/raw_geo/std": 0.06679695132261204, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1075.9375, "completions/mean_terminated_length": 1075.9375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.05581116223244649, "frac_reward_zero_std": 0.0, "grad_norm": 3.1001417425462474, "kl": 0.0018062591552734375, "learning_rate": 5.560000000000001e-07, "loss": -0.0128, "num_tokens": 12082104.0, "reward": 0.0, "reward_std": 0.6159245371818542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0631568710393361, "rewards/wordcountpos_reward/raw_geo/std": 0.07380088512307473, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1019.4375, "completions/mean_terminated_length": 987.4000244140625, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.05601120224044809, "frac_reward_zero_std": 0.0, "grad_norm": 3.090708545740446, "kl": 0.00196075439453125, "learning_rate": 5.58e-07, "loss": -0.0039, "num_tokens": 12115935.0, "reward": 0.0, "reward_std": 0.612042248249054, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09689570793110974, "rewards/wordcountpos_reward/raw_geo/std": 0.10598693754127003, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 931.875, "completions/mean_terminated_length": 931.875, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.05621124224844969, "frac_reward_zero_std": 0.0, "grad_norm": 3.363495670737204, "kl": 0.0018367767333984375, "learning_rate": 5.6e-07, "loss": 0.0292, "num_tokens": 12152613.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8437509536743164, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09482659079586239, "rewards/wordcountpos_reward/raw_geo/std": 0.13114599876635355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1302.4375, "completions/mean_terminated_length": 1274.21435546875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.05641128225645129, "frac_reward_zero_std": 0.0, "grad_norm": 2.8831623863197833, "kl": 0.002315521240234375, "learning_rate": 5.620000000000001e-07, "loss": -0.0359, "num_tokens": 12205908.0, "reward": 0.0, "reward_std": 1.027881145477295, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09447188890396592, "rewards/wordcountpos_reward/raw_geo/std": 0.15564244232480584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14807405554629052, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1172.5625, "completions/mean_terminated_length": 1097.0, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.05661132226445289, "frac_reward_zero_std": 0.0, "grad_norm": 3.1855622622614663, "kl": 0.0025482177734375, "learning_rate": 5.639999999999999e-07, "loss": -0.0679, "num_tokens": 12249109.0, "reward": 0.0, "reward_std": 0.8259780406951904, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10498265392435348, "rewards/wordcountpos_reward/raw_geo/std": 0.1863217193440607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1024.375, "completions/mean_terminated_length": 1024.375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.056811362272454494, "frac_reward_zero_std": 0.0, "grad_norm": 2.812524829164937, "kl": 0.0015053749084472656, "learning_rate": 5.66e-07, "loss": -0.0081, "num_tokens": 12292467.0, "reward": -7.450580596923828e-09, "reward_std": 1.0298956632614136, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14568746959091, "rewards/wordcountpos_reward/raw_geo/std": 0.0523996791957048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1109.125, "completions/mean_terminated_length": 1083.0667724609375, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.05701140228045609, "frac_reward_zero_std": 0.0, "grad_norm": 4.0071112440453645, "kl": 0.003082275390625, "learning_rate": 5.679999999999999e-07, "loss": -0.0033, "num_tokens": 12341581.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0290765762329102, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058311324661011944, "rewards/wordcountpos_reward/raw_geo/std": 0.0708403946332691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1457.25, "completions/mean_terminated_length": 1386.0, "completions/min_length": 1210.0, "completions/min_terminated_length": 1210.0, "epoch": 0.05721144228845769, "frac_reward_zero_std": 0.0, "grad_norm": 2.2016616153199116, "kl": 0.001537322998046875, "learning_rate": 5.699999999999999e-07, "loss": 0.0183, "num_tokens": 12395065.0, "reward": 0.0, "reward_std": 0.6284165382385254, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06762073027489378, "rewards/wordcountpos_reward/raw_geo/std": 0.1083948509433297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19302657656203526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1148.1875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.057411482296459294, "frac_reward_zero_std": 0.0, "grad_norm": 3.2005057734895987, "kl": 0.002338409423828125, "learning_rate": 5.719999999999999e-07, "loss": -0.0073, "num_tokens": 12447532.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9242644906044006, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2421782276404428, "rewards/wordcountpos_reward/raw_geo/std": 0.36510859624452024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 999.1875, "completions/mean_terminated_length": 999.1875, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.05761152230446089, "frac_reward_zero_std": 0.0, "grad_norm": 3.001088548172376, "kl": 0.0018463134765625, "learning_rate": 5.739999999999999e-07, "loss": -0.0142, "num_tokens": 12485719.0, "reward": 0.0, "reward_std": 1.0114140510559082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.039533985923892255, "rewards/wordcountpos_reward/raw_geo/std": 0.06518250967437134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1215.0, "completions/mean_terminated_length": 1196.0001220703125, "completions/min_length": 1051.0, "completions/min_terminated_length": 1051.0, "epoch": 0.05781156231246249, "frac_reward_zero_std": 0.0, "grad_norm": 2.4212077809990387, "kl": 0.0014495849609375, "learning_rate": 5.76e-07, "loss": 0.0129, "num_tokens": 12533639.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9643208384513855, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010432199316583488, "rewards/wordcountpos_reward/raw_geo/std": 0.2838686951503673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1108.1875, "completions/mean_terminated_length": 1108.1875, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.058011602320464095, "frac_reward_zero_std": 0.0, "grad_norm": 3.5708859830894153, "kl": 0.00247955322265625, "learning_rate": 5.779999999999999e-07, "loss": 0.0293, "num_tokens": 12574658.0, "reward": 0.0, "reward_std": 0.8100683689117432, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20566863818592698, "rewards/wordcountpos_reward/raw_geo/std": 0.11447752648502987, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 994.1875, "completions/mean_terminated_length": 960.4667358398438, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.05821164232846569, "frac_reward_zero_std": 0.0, "grad_norm": 3.123399244318877, "kl": 0.0014438629150390625, "learning_rate": 5.8e-07, "loss": -0.0178, "num_tokens": 12608757.0, "reward": 5.960464477539063e-08, "reward_std": 0.7506067752838135, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.032369080666875855, "rewards/wordcountpos_reward/raw_geo/std": 0.1782802644436956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 897.25, "completions/mean_terminated_length": 897.25, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.05841168233646729, "frac_reward_zero_std": 0.0, "grad_norm": 4.121676792863123, "kl": 0.002887725830078125, "learning_rate": 5.819999999999999e-07, "loss": 0.0138, "num_tokens": 12648025.0, "reward": -1.4901161193847656e-08, "reward_std": 0.918125331401825, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08942325223787094, "rewards/wordcountpos_reward/raw_geo/std": 0.1267665447355549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1112.916748046875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.058611722344468896, "frac_reward_zero_std": 0.0, "grad_norm": 3.3944720467872944, "kl": 0.002704620361328125, "learning_rate": 5.839999999999999e-07, "loss": -0.0203, "num_tokens": 12694924.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9822285175323486, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07775060471764175, "rewards/wordcountpos_reward/raw_geo/std": 0.09679048885033613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1246476515504285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1084.375, "completions/mean_terminated_length": 1084.375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.058811762352470494, "frac_reward_zero_std": 0.0, "grad_norm": 3.6419277625909783, "kl": 0.0022735595703125, "learning_rate": 5.86e-07, "loss": -0.0243, "num_tokens": 12739842.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0385416746139526, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11827043702999762, "rewards/wordcountpos_reward/raw_geo/std": 0.12328475187178402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11287488977066927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 932.0625, "completions/mean_terminated_length": 932.0625, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.05901180236047209, "frac_reward_zero_std": 0.0, "grad_norm": 3.7067288939495304, "kl": 0.0022945404052734375, "learning_rate": 5.879999999999999e-07, "loss": -0.0416, "num_tokens": 12786907.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8422307968139648, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0790643612082375, "rewards/wordcountpos_reward/raw_geo/std": 0.08532462775981063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 832.625, "completions/mean_terminated_length": 832.625, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.0592118423684737, "frac_reward_zero_std": 0.0, "grad_norm": 2.9964245781817285, "kl": 0.001468658447265625, "learning_rate": 5.9e-07, "loss": -0.002, "num_tokens": 12817349.0, "reward": -2.9802322387695312e-08, "reward_std": 0.572007417678833, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03612498848301205, "rewards/wordcountpos_reward/raw_geo/std": 0.08841184925567475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1214.125, "completions/mean_terminated_length": 1195.0667724609375, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.059411882376475295, "frac_reward_zero_std": 0.0, "grad_norm": 3.0757043152235926, "kl": 0.00231170654296875, "learning_rate": 5.919999999999999e-07, "loss": 0.0112, "num_tokens": 12869775.0, "reward": 0.0, "reward_std": 1.0303146839141846, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06435223181714003, "rewards/wordcountpos_reward/raw_geo/std": 0.11032298804130312, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1188.1875, "completions/mean_terminated_length": 1167.4000244140625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.05961192238447689, "frac_reward_zero_std": 0.0, "grad_norm": 3.3795733599482705, "kl": 0.002716064453125, "learning_rate": 5.939999999999999e-07, "loss": 0.0292, "num_tokens": 12913442.0, "reward": 0.0, "reward_std": 0.8517546653747559, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19127794010239385, "rewards/wordcountpos_reward/raw_geo/std": 0.2392408971160161, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1043.875, "completions/mean_terminated_length": 1043.875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.0598119623924785, "frac_reward_zero_std": 0.0, "grad_norm": 3.1264651625284445, "kl": 0.0018444061279296875, "learning_rate": 5.96e-07, "loss": -0.0202, "num_tokens": 12956928.0, "reward": -2.9802322387695312e-08, "reward_std": 0.780985951423645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06315532973992809, "rewards/wordcountpos_reward/raw_geo/std": 0.08005901049505422, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1313.75, "completions/mean_terminated_length": 1287.1429443359375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.060012002400480095, "frac_reward_zero_std": 0.0, "grad_norm": 2.5101882857081543, "kl": 0.0015964508056640625, "learning_rate": 5.979999999999999e-07, "loss": -0.0166, "num_tokens": 13010348.0, "reward": 0.0, "reward_std": 0.9337519407272339, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12569856781501984, "rewards/wordcountpos_reward/raw_geo/std": 0.18251893088022128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 892.25, "completions/mean_terminated_length": 892.25, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.06021204240848169, "frac_reward_zero_std": 0.0, "grad_norm": 3.8346163247245273, "kl": 0.0027618408203125, "learning_rate": 6e-07, "loss": -0.0223, "num_tokens": 13060680.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9565110802650452, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03965529497944607, "rewards/wordcountpos_reward/raw_geo/std": 0.25520347327466575, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1126.875, "completions/mean_terminated_length": 1126.875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.0604120824164833, "frac_reward_zero_std": 0.0, "grad_norm": 2.6806723016782477, "kl": 0.0012826919555664062, "learning_rate": 6.019999999999999e-07, "loss": -0.0367, "num_tokens": 13104510.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5853835940361023, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08667054525418348, "rewards/wordcountpos_reward/raw_geo/std": 0.0888390598040084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000003, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 975.25, "completions/mean_terminated_length": 975.25, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.060612122424484896, "frac_reward_zero_std": 0.0, "grad_norm": 2.8437785743985784, "kl": 0.001529693603515625, "learning_rate": 6.04e-07, "loss": 0.0088, "num_tokens": 13138474.0, "reward": 0.0, "reward_std": 0.36437976360321045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06299645475646642, "rewards/wordcountpos_reward/raw_geo/std": 0.06302040927071395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12758439472669758, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1020.4375, "completions/mean_terminated_length": 1020.4375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.060812162432486494, "frac_reward_zero_std": 0.0, "grad_norm": 2.4869444550853714, "kl": 0.0012989044189453125, "learning_rate": 6.06e-07, "loss": 0.0257, "num_tokens": 13175265.0, "reward": 0.0, "reward_std": 0.7430821657180786, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005488080137889088, "rewards/wordcountpos_reward/raw_geo/std": 0.07019403386774242, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1178.25, "completions/mean_terminated_length": 1156.800048828125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.0610122024404881, "frac_reward_zero_std": 0.0, "grad_norm": 2.6258813079891294, "kl": 0.0019683837890625, "learning_rate": 6.079999999999999e-07, "loss": -0.1066, "num_tokens": 13222661.0, "reward": 1.4901161193847656e-08, "reward_std": 1.029001235961914, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05724283744699999, "rewards/wordcountpos_reward/raw_geo/std": 0.08013325872368318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16638865702079933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1237.5, "completions/mean_terminated_length": 1220.0001220703125, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.0612122424484897, "frac_reward_zero_std": 0.0, "grad_norm": 2.7390794375804473, "kl": 0.0018596649169921875, "learning_rate": 6.1e-07, "loss": -0.0221, "num_tokens": 13266333.0, "reward": 0.0, "reward_std": 0.8190957307815552, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005076199972937165, "rewards/wordcountpos_reward/raw_geo/std": 0.16572168512538063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1056.625, "completions/mean_terminated_length": 1056.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.0614122824564913, "frac_reward_zero_std": 0.0, "grad_norm": 3.063044264731715, "kl": 0.0019969940185546875, "learning_rate": 6.119999999999999e-07, "loss": 0.0309, "num_tokens": 13307983.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9251764416694641, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04105501676273889, "rewards/wordcountpos_reward/raw_geo/std": 0.13178643108276034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1010.0, "completions/mean_terminated_length": 1010.0, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.0616123224644929, "frac_reward_zero_std": 0.0, "grad_norm": 3.9874463752278735, "kl": 0.002811431884765625, "learning_rate": 6.14e-07, "loss": 0.0082, "num_tokens": 13342799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.887786865234375, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025254785697083126, "rewards/wordcountpos_reward/raw_geo/std": 0.036471777473698085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1009.1875, "completions/mean_terminated_length": 1009.1875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.0618123624724945, "frac_reward_zero_std": 0.0, "grad_norm": 2.8693445513047005, "kl": 0.0018749237060546875, "learning_rate": 6.16e-07, "loss": 0.0035, "num_tokens": 13391522.0, "reward": 0.0, "reward_std": 0.8305159211158752, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11150393212404293, "rewards/wordcountpos_reward/raw_geo/std": 0.06834906851606187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1221.3125, "completions/mean_terminated_length": 1202.7333984375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.0620124024804961, "frac_reward_zero_std": 0.0, "grad_norm": 2.303923800082003, "kl": 0.001476287841796875, "learning_rate": 6.18e-07, "loss": -0.0188, "num_tokens": 13444119.0, "reward": 7.450580596923828e-09, "reward_std": 0.9877969026565552, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10394082649393052, "rewards/wordcountpos_reward/raw_geo/std": 0.08866684990643865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.15682025568335423, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 1079.875, "completions/mean_terminated_length": 1079.875, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.0622124424884977, "frac_reward_zero_std": 0.0, "grad_norm": 3.3611722832709807, "kl": 0.0022983551025390625, "learning_rate": 6.2e-07, "loss": 0.0016, "num_tokens": 13476485.0, "reward": 0.0, "reward_std": 1.0434529781341553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14645681153285914, "rewards/wordcountpos_reward/raw_geo/std": 0.05453470913693115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1185.4375, "completions/mean_terminated_length": 1112.84619140625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.0624124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.3905020408211195, "kl": 0.002315521240234375, "learning_rate": 6.219999999999999e-07, "loss": 0.0245, "num_tokens": 13527916.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0176523923873901, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11392709757098199, "rewards/wordcountpos_reward/raw_geo/std": 0.17001836166721562, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1016.0, "completions/mean_terminated_length": 1016.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.0626125225045009, "frac_reward_zero_std": 0.0, "grad_norm": 2.764912834215363, "kl": 0.0015354156494140625, "learning_rate": 6.24e-07, "loss": -0.0016, "num_tokens": 13576060.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9084645509719849, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07193952034221997, "rewards/wordcountpos_reward/raw_geo/std": 0.05538298695013117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1041.625, "completions/mean_terminated_length": 1011.0667114257812, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.0628125625125025, "frac_reward_zero_std": 0.0, "grad_norm": 3.208809453598079, "kl": 0.00234222412109375, "learning_rate": 6.26e-07, "loss": 0.0445, "num_tokens": 13631350.0, "reward": 0.0, "reward_std": 0.9452207684516907, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1717315844490716, "rewards/wordcountpos_reward/raw_geo/std": 0.248546937009435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.19085577257690145, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1147.4375, "completions/mean_terminated_length": 1147.4375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.0630126025205041, "frac_reward_zero_std": 0.0, "grad_norm": 3.182743019031256, "kl": 0.0020084381103515625, "learning_rate": 6.28e-07, "loss": -0.0422, "num_tokens": 13668245.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8133895397186279, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09711728312407862, "rewards/wordcountpos_reward/raw_geo/std": 0.06648308475473783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1149.0625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.0632126425285057, "frac_reward_zero_std": 0.0, "grad_norm": 3.371215938064058, "kl": 0.0027313232421875, "learning_rate": 6.3e-07, "loss": -0.0185, "num_tokens": 13722822.0, "reward": 0.0, "reward_std": 0.8048403263092041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05137935671907337, "rewards/wordcountpos_reward/raw_geo/std": 0.14063821767116533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1108.4375, "completions/mean_terminated_length": 1108.4375, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.0634126825365073, "frac_reward_zero_std": 0.0, "grad_norm": 3.2660824124838803, "kl": 0.002685546875, "learning_rate": 6.319999999999999e-07, "loss": -0.0136, "num_tokens": 13764357.0, "reward": 2.9802322387695312e-08, "reward_std": 0.808626115322113, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059376022695172254, "rewards/wordcountpos_reward/raw_geo/std": 0.1811806562000528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1039.25, "completions/mean_terminated_length": 1008.5333862304688, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.0636127225445089, "frac_reward_zero_std": 0.0, "grad_norm": 3.169479100437204, "kl": 0.0019283294677734375, "learning_rate": 6.34e-07, "loss": 0.0095, "num_tokens": 13805513.0, "reward": 0.0, "reward_std": 1.0416817665100098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09504080041498822, "rewards/wordcountpos_reward/raw_geo/std": 0.07216381431360824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 952.8125, "completions/mean_terminated_length": 952.8125, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.0638127625525105, "frac_reward_zero_std": 0.0, "grad_norm": 3.3554670934085853, "kl": 0.0021762847900390625, "learning_rate": 6.36e-07, "loss": -0.0313, "num_tokens": 13847878.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9850356578826904, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08900518555554442, "rewards/wordcountpos_reward/raw_geo/std": 0.049632250821620906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1112.4375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.0640128025605121, "frac_reward_zero_std": 0.0, "grad_norm": 3.150213494732458, "kl": 0.001941680908203125, "learning_rate": 6.38e-07, "loss": -0.0559, "num_tokens": 13884597.0, "reward": 0.0, "reward_std": 0.6624261140823364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027774763845650578, "rewards/wordcountpos_reward/raw_geo/std": 0.22926687131285664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1137.0, "completions/mean_terminated_length": 1085.1429443359375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.0642128425685137, "frac_reward_zero_std": 0.0, "grad_norm": 2.7949010862492543, "kl": 0.0018482208251953125, "learning_rate": 6.4e-07, "loss": 0.0362, "num_tokens": 13930397.0, "reward": 0.0, "reward_std": 0.9132857918739319, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0517072507827321, "rewards/wordcountpos_reward/raw_geo/std": 0.10795766044018207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459203, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1057.3125, "completions/mean_terminated_length": 1057.3125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.0644128825765153, "frac_reward_zero_std": 0.0, "grad_norm": 3.8088891084344714, "kl": 0.002841949462890625, "learning_rate": 6.42e-07, "loss": -0.0485, "num_tokens": 13979578.0, "reward": 0.0, "reward_std": 0.7923277616500854, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26652284433631424, "rewards/wordcountpos_reward/raw_geo/std": 0.29583048435687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.06461292258451691, "frac_reward_zero_std": 0.0, "grad_norm": 3.628668319526969, "kl": 0.002689361572265625, "learning_rate": 6.44e-07, "loss": -0.0469, "num_tokens": 14022820.0, "reward": 3.3527612686157227e-08, "reward_std": 1.0566508769989014, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.053744711475414986, "rewards/wordcountpos_reward/raw_geo/std": 0.062447366133733515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 840.875, "completions/mean_terminated_length": 840.875, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.06481296259251851, "frac_reward_zero_std": 0.0, "grad_norm": 4.162774266686213, "kl": 0.0028839111328125, "learning_rate": 6.46e-07, "loss": -0.0581, "num_tokens": 14058098.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9659443497657776, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017583180819989268, "rewards/wordcountpos_reward/raw_geo/std": 0.040396642014153515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1043.3125, "completions/mean_terminated_length": 1043.3125, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.0650130026005201, "frac_reward_zero_std": 0.0, "grad_norm": 3.3173352056337864, "kl": 0.0025634765625, "learning_rate": 6.48e-07, "loss": 0.0054, "num_tokens": 14099135.0, "reward": 0.0, "reward_std": 0.8442152738571167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14853364130675664, "rewards/wordcountpos_reward/raw_geo/std": 0.17144873852492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1072.5, "completions/mean_terminated_length": 1072.5, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.0652130426085217, "frac_reward_zero_std": 0.0, "grad_norm": 3.6595747162271466, "kl": 0.00283050537109375, "learning_rate": 6.5e-07, "loss": -0.0566, "num_tokens": 14149071.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7453584671020508, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012454429072859121, "rewards/wordcountpos_reward/raw_geo/std": 0.07412407372166926, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1281.0625, "completions/mean_terminated_length": 1149.7000732421875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.0654130826165233, "frac_reward_zero_std": 0.0, "grad_norm": 2.176983313820909, "kl": 0.00162506103515625, "learning_rate": 6.52e-07, "loss": 0.0172, "num_tokens": 14197480.0, "reward": 0.0, "reward_std": 0.687066912651062, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08468934088941331, "rewards/wordcountpos_reward/raw_geo/std": 0.20035030405853219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1203.0, "completions/mean_terminated_length": 1203.0, "completions/min_length": 1112.0, "completions/min_terminated_length": 1112.0, "epoch": 0.0656131226245249, "frac_reward_zero_std": 0.0, "grad_norm": 2.2508664761846973, "kl": 0.0011320114135742188, "learning_rate": 6.54e-07, "loss": -0.0258, "num_tokens": 14242432.0, "reward": 0.0, "reward_std": 0.7108990550041199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04138512484886412, "rewards/wordcountpos_reward/raw_geo/std": 0.038863689948215284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 910.3125, "completions/mean_terminated_length": 910.3125, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.06581316263252651, "frac_reward_zero_std": 0.0, "grad_norm": 11.575300056029576, "kl": 0.010959625244140625, "learning_rate": 6.56e-07, "loss": 0.0436, "num_tokens": 14284277.0, "reward": 0.0, "reward_std": 0.4071442782878876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15959212636785292, "rewards/wordcountpos_reward/raw_geo/std": 0.08815790274196343, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.16504769232176725, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1297.0625, "completions/mean_terminated_length": 1297.0625, "completions/min_length": 1124.0, "completions/min_terminated_length": 1124.0, "epoch": 0.06601320264052811, "frac_reward_zero_std": 0.0, "grad_norm": 2.4195217715731987, "kl": 0.001461029052734375, "learning_rate": 6.58e-07, "loss": -0.0004, "num_tokens": 14327758.0, "reward": 0.0, "reward_std": 0.8564111590385437, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04800210085630859, "rewards/wordcountpos_reward/raw_geo/std": 0.08192731407821649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1024.875, "completions/mean_terminated_length": 1024.875, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.06621324264852971, "frac_reward_zero_std": 0.0, "grad_norm": 4.2884484054422245, "kl": 0.002864837646484375, "learning_rate": 6.6e-07, "loss": 0.0125, "num_tokens": 14372228.0, "reward": 0.0, "reward_std": 0.8580461740493774, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14922614970099476, "rewards/wordcountpos_reward/raw_geo/std": 0.17456040203399587, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1118.375, "completions/mean_terminated_length": 1118.375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.0664132826565313, "frac_reward_zero_std": 0.0, "grad_norm": 3.347544540598828, "kl": 0.002353668212890625, "learning_rate": 6.62e-07, "loss": -0.0511, "num_tokens": 14419170.0, "reward": 0.0, "reward_std": 0.6000388264656067, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021246552942310905, "rewards/wordcountpos_reward/raw_geo/std": 0.17621630609971914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16187558093703852, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1119.5, "completions/mean_terminated_length": 1119.5, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.0666133226645329, "frac_reward_zero_std": 0.0, "grad_norm": 3.236978140615024, "kl": 0.0025634765625, "learning_rate": 6.64e-07, "loss": 0.0054, "num_tokens": 14461018.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5933891534805298, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05126863502407506, "rewards/wordcountpos_reward/raw_geo/std": 0.08129981030182694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 844.4375, "completions/mean_terminated_length": 844.4375, "completions/min_length": 358.0, "completions/min_terminated_length": 358.0, "epoch": 0.0668133626725345, "frac_reward_zero_std": 0.0, "grad_norm": 3.796790200361543, "kl": 0.00252532958984375, "learning_rate": 6.66e-07, "loss": 0.0222, "num_tokens": 14504441.0, "reward": 0.0, "reward_std": 0.578281044960022, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03980048960926316, "rewards/wordcountpos_reward/raw_geo/std": 0.059171702716111954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1083.125, "completions/mean_terminated_length": 1083.125, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.06701340268053611, "frac_reward_zero_std": 0.0, "grad_norm": 2.960862612234057, "kl": 0.001617431640625, "learning_rate": 6.68e-07, "loss": 0.0126, "num_tokens": 14539699.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9998736381530762, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03836378500552864, "rewards/wordcountpos_reward/raw_geo/std": 0.06505059535662829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1019.5625, "completions/mean_terminated_length": 1019.5625, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.06721344268853771, "frac_reward_zero_std": 0.0, "grad_norm": 3.044760141416458, "kl": 0.001708984375, "learning_rate": 6.7e-07, "loss": -0.0471, "num_tokens": 14590132.0, "reward": 0.0, "reward_std": 0.7415168285369873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.048215794580698204, "rewards/wordcountpos_reward/raw_geo/std": 0.21159535757855202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 949.5625, "completions/mean_terminated_length": 949.5625, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.06741348269653931, "frac_reward_zero_std": 0.0, "grad_norm": 2.9768790752339065, "kl": 0.0016498565673828125, "learning_rate": 6.72e-07, "loss": -0.0326, "num_tokens": 14641037.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9546054601669312, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06805036725184002, "rewards/wordcountpos_reward/raw_geo/std": 0.1477188160573905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1066.8125, "completions/mean_terminated_length": 1066.8125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.06761352270454091, "frac_reward_zero_std": 0.0, "grad_norm": 3.181960438740294, "kl": 0.002201080322265625, "learning_rate": 6.74e-07, "loss": -0.0371, "num_tokens": 14681834.0, "reward": 0.0, "reward_std": 1.0623748302459717, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08070542429791751, "rewards/wordcountpos_reward/raw_geo/std": 0.20169913646235652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1432.625, "completions/mean_terminated_length": 1392.2000732421875, "completions/min_length": 1234.0, "completions/min_terminated_length": 1234.0, "epoch": 0.0678135627125425, "frac_reward_zero_std": 0.0, "grad_norm": 2.2765867329539096, "kl": 0.0016117095947265625, "learning_rate": 6.76e-07, "loss": -0.0028, "num_tokens": 14731316.0, "reward": 0.0, "reward_std": 0.46427273750305176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11775969495363756, "rewards/wordcountpos_reward/raw_geo/std": 0.08700059071223658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1184.75, "completions/mean_terminated_length": 1163.7333984375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.0680136027205441, "frac_reward_zero_std": 0.0, "grad_norm": 2.534593453322148, "kl": 0.00226593017578125, "learning_rate": 6.78e-07, "loss": -0.1523, "num_tokens": 14780184.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9950509071350098, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10665595219291132, "rewards/wordcountpos_reward/raw_geo/std": 0.07305267964045026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.19148542155126763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1141.875, "completions/mean_terminated_length": 1141.875, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.06821364272854571, "frac_reward_zero_std": 0.0, "grad_norm": 2.881249157475616, "kl": 0.002140045166015625, "learning_rate": 6.800000000000001e-07, "loss": -0.0323, "num_tokens": 14822270.0, "reward": 0.0, "reward_std": 0.8476592302322388, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16595827895346643, "rewards/wordcountpos_reward/raw_geo/std": 0.2569588317902804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1034.9375, "completions/mean_terminated_length": 1034.9375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.06841368273654731, "frac_reward_zero_std": 0.0, "grad_norm": 2.709589589085801, "kl": 0.0016498565673828125, "learning_rate": 6.82e-07, "loss": 0.001, "num_tokens": 14872501.0, "reward": 2.9802322387695312e-08, "reward_std": 0.873496413230896, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12085816525472777, "rewards/wordcountpos_reward/raw_geo/std": 0.08748211994370123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1236.75, "completions/mean_terminated_length": 1236.75, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.06861372274454891, "frac_reward_zero_std": 0.0, "grad_norm": 3.188809501759933, "kl": 0.00220489501953125, "learning_rate": 6.84e-07, "loss": -0.0068, "num_tokens": 14915129.0, "reward": 0.0, "reward_std": 0.708284854888916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2786993791153533, "rewards/wordcountpos_reward/raw_geo/std": 0.23962569152925248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 968.625, "completions/mean_terminated_length": 968.625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.06881376275255051, "frac_reward_zero_std": 0.0, "grad_norm": 2.7840982497592917, "kl": 0.001495361328125, "learning_rate": 6.86e-07, "loss": -0.0259, "num_tokens": 14957419.0, "reward": 0.0, "reward_std": 0.9442850351333618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.055345984809441334, "rewards/wordcountpos_reward/raw_geo/std": 0.07450376610568207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1317.125, "completions/mean_terminated_length": 1134.25, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.0690138027605521, "frac_reward_zero_std": 0.0, "grad_norm": 2.835135661127783, "kl": 0.002285003662109375, "learning_rate": 6.879999999999999e-07, "loss": 0.0125, "num_tokens": 15011917.0, "reward": 0.0, "reward_std": 0.6531475782394409, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09215414551257015, "rewards/wordcountpos_reward/raw_geo/std": 0.13822771617055088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1323.5625, "completions/mean_terminated_length": 1217.7000732421875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.0692138427685537, "frac_reward_zero_std": 0.0, "grad_norm": 2.8209262766980157, "kl": 0.002353668212890625, "learning_rate": 6.9e-07, "loss": -0.0114, "num_tokens": 15056006.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3590697646141052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2157673199946833, "rewards/wordcountpos_reward/raw_geo/std": 0.2541211332646892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1115.625, "completions/mean_terminated_length": 1090.0, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.06941388277655532, "frac_reward_zero_std": 0.0, "grad_norm": 2.6752427537410512, "kl": 0.0013446807861328125, "learning_rate": 6.919999999999999e-07, "loss": 0.0106, "num_tokens": 15100024.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0165367126464844, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06213331587041926, "rewards/wordcountpos_reward/raw_geo/std": 0.0663310457195454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1263.6875, "completions/mean_terminated_length": 1209.1539306640625, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.06961392278455691, "frac_reward_zero_std": 0.0, "grad_norm": 3.5208534028308445, "kl": 0.002899169921875, "learning_rate": 6.939999999999999e-07, "loss": 0.0, "num_tokens": 15149139.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8045864105224609, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.139632860388533, "rewards/wordcountpos_reward/raw_geo/std": 0.09975457244061715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.19925788241297684, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1255.8125, "completions/mean_terminated_length": 1255.8125, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.06981396279255851, "frac_reward_zero_std": 0.0, "grad_norm": 2.879589260932738, "kl": 0.0022430419921875, "learning_rate": 6.959999999999999e-07, "loss": 0.0049, "num_tokens": 15198080.0, "reward": -1.4901161193847656e-08, "reward_std": 0.998940110206604, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016722163423016047, "rewards/wordcountpos_reward/raw_geo/std": 0.07768241344881252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1194.625, "completions/mean_terminated_length": 1174.2667236328125, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.07001400280056011, "frac_reward_zero_std": 0.0, "grad_norm": 2.947983812382237, "kl": 0.0017719268798828125, "learning_rate": 6.979999999999999e-07, "loss": 0.0094, "num_tokens": 15240754.0, "reward": 0.0, "reward_std": 1.0460045337677002, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06416512868165652, "rewards/wordcountpos_reward/raw_geo/std": 0.11803918568625868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 1203.875, "completions/mean_terminated_length": 907.75, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.07021404280856171, "frac_reward_zero_std": 0.0, "grad_norm": 2.9159886477539008, "kl": 0.00200653076171875, "learning_rate": 7e-07, "loss": -0.0338, "num_tokens": 15284592.0, "reward": 2.60770320892334e-08, "reward_std": 1.0591906309127808, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08274944216271243, "rewards/wordcountpos_reward/raw_geo/std": 0.13913631224809797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1189.0625, "completions/mean_terminated_length": 1189.0625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.0704140828165633, "frac_reward_zero_std": 0.0, "grad_norm": 2.402210024944055, "kl": 0.0016193389892578125, "learning_rate": 7.019999999999999e-07, "loss": 0.0035, "num_tokens": 15328881.0, "reward": -7.450580596923828e-09, "reward_std": 1.0541963577270508, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.31289127576000936, "rewards/wordcountpos_reward/raw_geo/std": 0.12024119136861808, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078614, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1093.375, "completions/mean_terminated_length": 1093.375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.07061412282456492, "frac_reward_zero_std": 0.0, "grad_norm": 3.638271287037237, "kl": 0.00286102294921875, "learning_rate": 7.04e-07, "loss": -0.0579, "num_tokens": 15374119.0, "reward": 0.0, "reward_std": 1.0525254011154175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14673167580737695, "rewards/wordcountpos_reward/raw_geo/std": 0.12173009811252362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1250.4375, "completions/mean_terminated_length": 1214.7857666015625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.07081416283256652, "frac_reward_zero_std": 0.0, "grad_norm": 2.62637743631162, "kl": 0.0017414093017578125, "learning_rate": 7.059999999999999e-07, "loss": -0.0088, "num_tokens": 15426054.0, "reward": 7.450580596923828e-09, "reward_std": 1.0290229320526123, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05629722815652811, "rewards/wordcountpos_reward/raw_geo/std": 0.09274705617736778, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1044.0, "completions/max_terminated_length": 1044.0, "completions/mean_length": 919.625, "completions/mean_terminated_length": 919.625, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.07101420284056811, "frac_reward_zero_std": 0.0, "grad_norm": 2.6926984871409587, "kl": 0.0010986328125, "learning_rate": 7.079999999999999e-07, "loss": -0.0075, "num_tokens": 15456544.0, "reward": 0.0, "reward_std": 0.8328101634979248, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.003483548583850108, "rewards/wordcountpos_reward/raw_geo/std": 0.060410686429002144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1053.1875, "completions/mean_terminated_length": 989.357177734375, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.07121424284856971, "frac_reward_zero_std": 0.0, "grad_norm": 2.719211311166968, "kl": 0.001689910888671875, "learning_rate": 7.1e-07, "loss": 0.0219, "num_tokens": 15507139.0, "reward": 0.0, "reward_std": 0.5637771487236023, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15861353078116033, "rewards/wordcountpos_reward/raw_geo/std": 0.12625282240985225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1277.625, "completions/mean_terminated_length": 1262.800048828125, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.07141428285657131, "frac_reward_zero_std": 0.0, "grad_norm": 2.435609043224974, "kl": 0.0015420913696289062, "learning_rate": 7.119999999999999e-07, "loss": -0.0008, "num_tokens": 15555077.0, "reward": 7.450580596923828e-09, "reward_std": 0.9934263229370117, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.08861176358222685, "rewards/wordcountpos_reward/raw_geo/std": 0.05935167698723634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1033.0, "completions/mean_terminated_length": 1033.0, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.07161432286457292, "frac_reward_zero_std": 0.0, "grad_norm": 3.5375847418865582, "kl": 0.0025787353515625, "learning_rate": 7.14e-07, "loss": -0.0168, "num_tokens": 15605781.0, "reward": -1.4901161193847656e-08, "reward_std": 1.007598876953125, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06353798041978058, "rewards/wordcountpos_reward/raw_geo/std": 0.07467621574831491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 952.375, "completions/mean_terminated_length": 952.375, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.07181436287257452, "frac_reward_zero_std": 0.0, "grad_norm": 3.9649064099482034, "kl": 0.002960205078125, "learning_rate": 7.159999999999999e-07, "loss": -0.003, "num_tokens": 15647635.0, "reward": 1.862645149230957e-08, "reward_std": 1.0129902362823486, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4265952735055052, "rewards/wordcountpos_reward/raw_geo/std": 0.25485152846738385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1107.533447265625, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.07201440288057612, "frac_reward_zero_std": 0.0, "grad_norm": 3.2863929244211296, "kl": 0.0025634765625, "learning_rate": 7.179999999999999e-07, "loss": -0.0269, "num_tokens": 15696476.0, "reward": 0.0, "reward_std": 1.0001307725906372, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07299820639434745, "rewards/wordcountpos_reward/raw_geo/std": 0.1415088623491039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 977.6875, "completions/mean_terminated_length": 977.6875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.07221444288857772, "frac_reward_zero_std": 0.0, "grad_norm": 3.745734395705583, "kl": 0.00186920166015625, "learning_rate": 7.2e-07, "loss": -0.0665, "num_tokens": 15746055.0, "reward": 0.0, "reward_std": 0.8177444934844971, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.4038995998701616, "rewards/wordcountpos_reward/raw_geo/std": 0.13189988564737254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18614013040757266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1182.0625, "completions/mean_terminated_length": 1136.6429443359375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.07241448289657931, "frac_reward_zero_std": 0.0, "grad_norm": 2.723404620398981, "kl": 0.0018215179443359375, "learning_rate": 7.219999999999999e-07, "loss": 0.0398, "num_tokens": 15788336.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7788569331169128, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08275675925236076, "rewards/wordcountpos_reward/raw_geo/std": 0.17988156817836473, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1134.4375, "completions/mean_terminated_length": 1134.4375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.07261452290458091, "frac_reward_zero_std": 0.0, "grad_norm": 3.3954514626295818, "kl": 0.00255584716796875, "learning_rate": 7.24e-07, "loss": -0.0147, "num_tokens": 15830903.0, "reward": 7.450580596923828e-09, "reward_std": 1.0275144577026367, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14644673945227946, "rewards/wordcountpos_reward/raw_geo/std": 0.08761564023917168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 956.9375, "completions/mean_terminated_length": 956.9375, "completions/min_length": 685.0, "completions/min_terminated_length": 685.0, "epoch": 0.07281456291258252, "frac_reward_zero_std": 0.0, "grad_norm": 3.7847189671405994, "kl": 0.002666473388671875, "learning_rate": 7.259999999999999e-07, "loss": -0.0189, "num_tokens": 15860358.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8187500238418579, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2553505085671958, "rewards/wordcountpos_reward/raw_geo/std": 0.08274951313215693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1169.375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.07301460292058412, "frac_reward_zero_std": 0.0, "grad_norm": 3.0581275082954615, "kl": 0.002239227294921875, "learning_rate": 7.28e-07, "loss": 0.0099, "num_tokens": 15905596.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9600626230239868, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06784530403272701, "rewards/wordcountpos_reward/raw_geo/std": 0.06333157851365523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1283.1875, "completions/mean_terminated_length": 1233.1539306640625, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.07321464292858572, "frac_reward_zero_std": 0.0, "grad_norm": 3.226000283354174, "kl": 0.00252532958984375, "learning_rate": 7.3e-07, "loss": -0.0068, "num_tokens": 15950519.0, "reward": 0.0, "reward_std": 0.8436201214790344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04730227003181049, "rewards/wordcountpos_reward/raw_geo/std": 0.054344553171499776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1273.1875, "completions/mean_terminated_length": 1240.7857666015625, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.07341468293658732, "frac_reward_zero_std": 0.0, "grad_norm": 2.6199039133576987, "kl": 0.0017242431640625, "learning_rate": 7.319999999999999e-07, "loss": 0.0066, "num_tokens": 15993714.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5935419201850891, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23694796349751493, "rewards/wordcountpos_reward/raw_geo/std": 0.38556423279124885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1179.0, "completions/mean_terminated_length": 1179.0, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.07361472294458891, "frac_reward_zero_std": 0.0, "grad_norm": 1.6552059955084237, "kl": 0.0009813308715820312, "learning_rate": 7.34e-07, "loss": -0.0057, "num_tokens": 16030034.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9800270199775696, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013932727343423858, "rewards/wordcountpos_reward/raw_geo/std": 0.09059217439165791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1189.625, "completions/mean_terminated_length": 1118.0, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.07381476295259051, "frac_reward_zero_std": 0.0, "grad_norm": 2.464919564534785, "kl": 0.001739501953125, "learning_rate": 7.359999999999999e-07, "loss": 0.0331, "num_tokens": 16081180.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9230073690414429, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.215495302357515, "rewards/wordcountpos_reward/raw_geo/std": 0.14489154310100222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 844.0, "completions/max_terminated_length": 844.0, "completions/mean_length": 729.6875, "completions/mean_terminated_length": 729.6875, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.07401480296059212, "frac_reward_zero_std": 0.0, "grad_norm": 2.7724744810969266, "kl": 0.0010919570922851562, "learning_rate": 7.38e-07, "loss": -0.0378, "num_tokens": 16107423.0, "reward": -2.60770320892334e-08, "reward_std": 1.0266315937042236, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004095394133707565, "rewards/wordcountpos_reward/raw_geo/std": 0.10111930300139639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1082.0, "completions/mean_terminated_length": 1082.0, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.07421484296859372, "frac_reward_zero_std": 0.0, "grad_norm": 3.337922187398566, "kl": 0.00258636474609375, "learning_rate": 7.4e-07, "loss": -0.0056, "num_tokens": 16146495.0, "reward": 7.450580596923828e-09, "reward_std": 0.9355493187904358, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08420984558230026, "rewards/wordcountpos_reward/raw_geo/std": 0.07092232973915294, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1154.4375, "completions/mean_terminated_length": 1154.4375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.07441488297659532, "frac_reward_zero_std": 0.0, "grad_norm": 3.4966790057141486, "kl": 0.0025482177734375, "learning_rate": 7.42e-07, "loss": -0.0283, "num_tokens": 16179726.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8922228813171387, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16021923976249017, "rewards/wordcountpos_reward/raw_geo/std": 0.16667407594414502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1206.1875, "completions/mean_terminated_length": 1206.1875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.07461492298459692, "frac_reward_zero_std": 0.5, "grad_norm": 2.4432796827890972, "kl": 0.0020771026611328125, "learning_rate": 7.44e-07, "loss": -0.0247, "num_tokens": 16222353.0, "reward": 0.0, "reward_std": 0.17276452481746674, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17299111516469837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1091.9375, "completions/mean_terminated_length": 1064.7333984375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.07481496299259852, "frac_reward_zero_std": 0.0, "grad_norm": 3.5947584572137843, "kl": 0.0023651123046875, "learning_rate": 7.459999999999999e-07, "loss": -0.0623, "num_tokens": 16261392.0, "reward": 0.0, "reward_std": 0.694343090057373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03110712424574219, "rewards/wordcountpos_reward/raw_geo/std": 0.07291467283236489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1103.25, "completions/mean_terminated_length": 1103.25, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.07501500300060011, "frac_reward_zero_std": 0.0, "grad_norm": 3.6920906931805635, "kl": 0.002742767333984375, "learning_rate": 7.48e-07, "loss": 0.0046, "num_tokens": 16301604.0, "reward": -7.450580596923828e-09, "reward_std": 1.005752682685852, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0416080213652988, "rewards/wordcountpos_reward/raw_geo/std": 0.16309424007497925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.0885061203156784, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1203.625, "completions/mean_terminated_length": 1183.86669921875, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.07521504300860173, "frac_reward_zero_std": 0.0, "grad_norm": 3.3545317210242334, "kl": 0.0030364990234375, "learning_rate": 7.5e-07, "loss": -0.0152, "num_tokens": 16355902.0, "reward": 0.0, "reward_std": 0.5474947690963745, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1279259424335897, "rewards/wordcountpos_reward/raw_geo/std": 0.05761335791497305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13977495139343474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 836.5, "completions/mean_terminated_length": 836.5, "completions/min_length": 479.0, "completions/min_terminated_length": 479.0, "epoch": 0.07541508301660332, "frac_reward_zero_std": 0.0, "grad_norm": 3.6670445297461014, "kl": 0.002223968505859375, "learning_rate": 7.52e-07, "loss": -0.059, "num_tokens": 16406710.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8515207171440125, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013081117936095111, "rewards/wordcountpos_reward/raw_geo/std": 0.20368873903382947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1355373393953503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1107.5, "completions/mean_terminated_length": 1107.5, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.07561512302460492, "frac_reward_zero_std": 0.0, "grad_norm": 2.3727977004602265, "kl": 0.00145721435546875, "learning_rate": 7.54e-07, "loss": 0.0302, "num_tokens": 16442646.0, "reward": 5.960464477539063e-08, "reward_std": 0.5576227903366089, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03322198957390813, "rewards/wordcountpos_reward/raw_geo/std": 0.05738959065309491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 927.0, "completions/max_terminated_length": 927.0, "completions/mean_length": 859.875, "completions/mean_terminated_length": 859.875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.07581516303260652, "frac_reward_zero_std": 0.0, "grad_norm": 3.697805428091899, "kl": 0.002292633056640625, "learning_rate": 7.559999999999999e-07, "loss": -0.0114, "num_tokens": 16481564.0, "reward": 7.450580596923828e-09, "reward_std": 1.017039179801941, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05201075129225997, "rewards/wordcountpos_reward/raw_geo/std": 0.13288906275971712, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1062.8125, "completions/mean_terminated_length": 1062.8125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.07601520304060812, "frac_reward_zero_std": 0.0, "grad_norm": 3.5022729325806448, "kl": 0.0028076171875, "learning_rate": 7.58e-07, "loss": 0.0008, "num_tokens": 16530209.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0048627853393555, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2779941087806745, "rewards/wordcountpos_reward/raw_geo/std": 0.15260954899620252, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 962.25, "completions/mean_terminated_length": 962.25, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.07621524304860972, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682405188518723, "kl": 0.001377105712890625, "learning_rate": 7.599999999999999e-07, "loss": -0.0374, "num_tokens": 16565373.0, "reward": -3.725290298461914e-09, "reward_std": 1.0429153442382812, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.15955278935526343, "rewards/wordcountpos_reward/raw_geo/std": 0.2711222972079778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 995.6875, "completions/mean_terminated_length": 995.6875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.07641528305661133, "frac_reward_zero_std": 0.0, "grad_norm": 4.051261841121727, "kl": 0.002964019775390625, "learning_rate": 7.62e-07, "loss": -0.013, "num_tokens": 16604600.0, "reward": 2.9802322387695312e-08, "reward_std": 0.733407735824585, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.030587188553242783, "rewards/wordcountpos_reward/raw_geo/std": 0.14480982093759714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward/raw_rule/std": 0.1567612007930345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1141.625, "completions/mean_terminated_length": 1058.923095703125, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.07661532306461293, "frac_reward_zero_std": 0.0, "grad_norm": 3.1291709283741174, "kl": 0.002391815185546875, "learning_rate": 7.64e-07, "loss": 0.0193, "num_tokens": 16649978.0, "reward": -1.4901161193847656e-08, "reward_std": 1.06006920337677, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19834771329243037, "rewards/wordcountpos_reward/raw_geo/std": 0.051849302348137095, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1071.1875, "completions/mean_terminated_length": 1071.1875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.07681536307261452, "frac_reward_zero_std": 0.0, "grad_norm": 3.155979852848612, "kl": 0.00244903564453125, "learning_rate": 7.66e-07, "loss": -0.0006, "num_tokens": 16693333.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9485359191894531, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08430169140353466, "rewards/wordcountpos_reward/raw_geo/std": 0.08878377427896136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1277.9375, "completions/mean_terminated_length": 1226.6923828125, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.07701540308061612, "frac_reward_zero_std": 0.0, "grad_norm": 3.4938680489508194, "kl": 0.00286102294921875, "learning_rate": 7.68e-07, "loss": -0.0001, "num_tokens": 16741652.0, "reward": 0.0, "reward_std": 0.49249571561813354, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016940986172604366, "rewards/wordcountpos_reward/raw_geo/std": 0.14560916546917568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1881193474602995, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1225.4375, "completions/mean_terminated_length": 1207.1334228515625, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.07721544308861772, "frac_reward_zero_std": 0.0, "grad_norm": 3.2066611808788124, "kl": 0.00226593017578125, "learning_rate": 7.699999999999999e-07, "loss": 0.0304, "num_tokens": 16779883.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0592622756958008, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0827675471542851, "rewards/wordcountpos_reward/raw_geo/std": 0.123529620953116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1060.75, "completions/mean_terminated_length": 1031.4666748046875, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.07741548309661932, "frac_reward_zero_std": 0.0, "grad_norm": 2.9810427562465636, "kl": 0.0017032623291015625, "learning_rate": 7.72e-07, "loss": -0.0, "num_tokens": 16825695.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9820472002029419, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010793569474357844, "rewards/wordcountpos_reward/raw_geo/std": 0.05924348082856902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1031.1875, "completions/mean_terminated_length": 1031.1875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.07761552310462093, "frac_reward_zero_std": 0.0, "grad_norm": 2.554974134488524, "kl": 0.0013971328735351562, "learning_rate": 7.74e-07, "loss": -0.0251, "num_tokens": 16862946.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0310511589050293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026319768042349218, "rewards/wordcountpos_reward/raw_geo/std": 0.036495620418145504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1071.125, "completions/mean_terminated_length": 1071.125, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.07781556311262253, "frac_reward_zero_std": 0.0, "grad_norm": 2.593683306278425, "kl": 0.0015869140625, "learning_rate": 7.76e-07, "loss": 0.0171, "num_tokens": 16897148.0, "reward": -2.9802322387695312e-08, "reward_std": 0.48925772309303284, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013115534997961788, "rewards/wordcountpos_reward/raw_geo/std": 0.16101667523872967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1036.25, "completions/mean_terminated_length": 1036.25, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.07801560312062412, "frac_reward_zero_std": 0.0, "grad_norm": 3.419498859244468, "kl": 0.00218963623046875, "learning_rate": 7.78e-07, "loss": -0.0414, "num_tokens": 16938296.0, "reward": 0.0, "reward_std": 0.6891583204269409, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09224928843248108, "rewards/wordcountpos_reward/raw_geo/std": 0.0882766204011879, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1305.6875, "completions/mean_terminated_length": 1277.9285888671875, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.07821564312862572, "frac_reward_zero_std": 0.0, "grad_norm": 2.156767519126892, "kl": 0.0014171600341796875, "learning_rate": 7.799999999999999e-07, "loss": 0.0272, "num_tokens": 16978179.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3249605894088745, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007760756467150007, "rewards/wordcountpos_reward/raw_geo/std": 0.11473095785869254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036264, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1296.6875, "completions/mean_terminated_length": 1249.769287109375, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.07841568313662732, "frac_reward_zero_std": 0.0, "grad_norm": 3.141433864872489, "kl": 0.002628326416015625, "learning_rate": 7.82e-07, "loss": -0.0033, "num_tokens": 17023382.0, "reward": 0.0, "reward_std": 0.8168051242828369, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13975184520123504, "rewards/wordcountpos_reward/raw_geo/std": 0.1701501706722421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657014, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 954.5, "completions/mean_terminated_length": 954.5, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.07861572314462893, "frac_reward_zero_std": 0.0, "grad_norm": 3.4281220819388367, "kl": 0.0023212432861328125, "learning_rate": 7.84e-07, "loss": -0.0243, "num_tokens": 17062486.0, "reward": 0.0, "reward_std": 1.0324459075927734, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14868802691353358, "rewards/wordcountpos_reward/raw_geo/std": 0.06756757576121707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1189.3125, "completions/mean_terminated_length": 1168.60009765625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.07881576315263053, "frac_reward_zero_std": 0.0, "grad_norm": 3.4411843215430924, "kl": 0.002674102783203125, "learning_rate": 7.86e-07, "loss": 0.0152, "num_tokens": 17115011.0, "reward": -2.9802322387695312e-08, "reward_std": 1.044392704963684, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010901298165659726, "rewards/wordcountpos_reward/raw_geo/std": 0.070686649989382, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16903867626692443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1070.375, "completions/mean_terminated_length": 1041.7333984375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.07901580316063213, "frac_reward_zero_std": 0.0, "grad_norm": 3.698409341036159, "kl": 0.002765655517578125, "learning_rate": 7.88e-07, "loss": 0.0133, "num_tokens": 17153169.0, "reward": 2.2351741790771484e-08, "reward_std": 1.011796474456787, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15778116853439067, "rewards/wordcountpos_reward/raw_geo/std": 0.1135791091307533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 962.0625, "completions/mean_terminated_length": 962.0625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.07921584316863373, "frac_reward_zero_std": 0.0, "grad_norm": 4.266535853506466, "kl": 0.003360748291015625, "learning_rate": 7.9e-07, "loss": -0.0682, "num_tokens": 17202370.0, "reward": 0.0, "reward_std": 0.9920775294303894, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.32518239042982766, "rewards/wordcountpos_reward/raw_geo/std": 0.09599094127420793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1198.8125, "completions/mean_terminated_length": 1198.8125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.07941588317663532, "frac_reward_zero_std": 0.0, "grad_norm": 2.7291319741433715, "kl": 0.0017671585083007812, "learning_rate": 7.92e-07, "loss": -0.0227, "num_tokens": 17251183.0, "reward": 0.0, "reward_std": 0.8251668214797974, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11715859467007739, "rewards/wordcountpos_reward/raw_geo/std": 0.3571080445410963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1059.5625, "completions/mean_terminated_length": 1059.5625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.07961592318463692, "frac_reward_zero_std": 0.0, "grad_norm": 3.567656293324913, "kl": 0.003063201904296875, "learning_rate": 7.94e-07, "loss": 0.0143, "num_tokens": 17291760.0, "reward": 0.0, "reward_std": 0.802862286567688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10289364811521635, "rewards/wordcountpos_reward/raw_geo/std": 0.09350648310008419, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125757, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1042.6875, "completions/mean_terminated_length": 1042.6875, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.07981596319263853, "frac_reward_zero_std": 0.0, "grad_norm": 2.80989495743674, "kl": 0.0022687911987304688, "learning_rate": 7.96e-07, "loss": -0.0083, "num_tokens": 17340731.0, "reward": 0.0, "reward_std": 0.7733708620071411, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022759323063843154, "rewards/wordcountpos_reward/raw_geo/std": 0.180026115102723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1053.1875, "completions/mean_terminated_length": 1023.4000244140625, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.08001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 3.144527936613555, "kl": 0.0021305084228515625, "learning_rate": 7.98e-07, "loss": 0.023, "num_tokens": 17385518.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8499571084976196, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11949830124805697, "rewards/wordcountpos_reward/raw_geo/std": 0.2666749306717188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 979.4375, "completions/mean_terminated_length": 979.4375, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.08021604320864173, "frac_reward_zero_std": 0.0, "grad_norm": 3.508709941120798, "kl": 0.0024261474609375, "learning_rate": 8e-07, "loss": -0.0329, "num_tokens": 17429069.0, "reward": 0.0, "reward_std": 0.43606865406036377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2479166716961376, "rewards/wordcountpos_reward/raw_geo/std": 0.179032899543126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.15098442401882486, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.08041608321664333, "frac_reward_zero_std": 0.0, "grad_norm": 2.327948427968791, "kl": 0.0014133453369140625, "learning_rate": 8.02e-07, "loss": -0.0167, "num_tokens": 17470750.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7718532681465149, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07753804506847807, "rewards/wordcountpos_reward/raw_geo/std": 0.09357421607500979, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1106.4375, "completions/mean_terminated_length": 1106.4375, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.08061612322464493, "frac_reward_zero_std": 0.0, "grad_norm": 3.3431244094197554, "kl": 0.0024871826171875, "learning_rate": 8.04e-07, "loss": 0.0214, "num_tokens": 17515269.0, "reward": -5.960464477539063e-08, "reward_std": 0.7134137153625488, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08792860096913051, "rewards/wordcountpos_reward/raw_geo/std": 0.04886506479392634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1047.5625, "completions/mean_terminated_length": 1047.5625, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.08081616323264652, "frac_reward_zero_std": 0.0, "grad_norm": 2.168145350955324, "kl": 0.0009412765502929688, "learning_rate": 8.06e-07, "loss": -0.0164, "num_tokens": 17555086.0, "reward": 0.0, "reward_std": 0.6112060546875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26590774376568715, "rewards/wordcountpos_reward/raw_geo/std": 0.14494511802659826, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 899.375, "completions/mean_terminated_length": 899.375, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.08101620324064814, "frac_reward_zero_std": 0.0, "grad_norm": 3.2491245033249765, "kl": 0.0017833709716796875, "learning_rate": 8.08e-07, "loss": 0.0246, "num_tokens": 17584236.0, "reward": 0.0, "reward_std": 0.7021193504333496, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.031133583219200484, "rewards/wordcountpos_reward/raw_geo/std": 0.10159549543044888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1122.4375, "completions/mean_terminated_length": 1097.2667236328125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.08121624324864973, "frac_reward_zero_std": 0.0, "grad_norm": 2.6370857377470136, "kl": 0.00182342529296875, "learning_rate": 8.1e-07, "loss": 0.0206, "num_tokens": 17625963.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0228652954101562, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.377113685405611, "rewards/wordcountpos_reward/raw_geo/std": 0.1660329168743078, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1161.9375, "completions/mean_terminated_length": 1161.9375, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.08141628325665133, "frac_reward_zero_std": 0.5, "grad_norm": 0.6630523529996382, "kl": 0.0002644062042236328, "learning_rate": 8.12e-07, "loss": -0.0026, "num_tokens": 17670226.0, "reward": 1.862645149230957e-09, "reward_std": 0.7558939456939697, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07028589862331452, "rewards/wordcountpos_reward/raw_geo/std": 0.08567249499400444, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1095.625, "completions/mean_terminated_length": 1095.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.08161632326465293, "frac_reward_zero_std": 0.0, "grad_norm": 2.4742911999419115, "kl": 0.0014219284057617188, "learning_rate": 8.14e-07, "loss": -0.014, "num_tokens": 17704148.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9453210830688477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04634748267184062, "rewards/wordcountpos_reward/raw_geo/std": 0.03990545299577224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1060.875, "completions/mean_terminated_length": 1031.60009765625, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.08181636327265453, "frac_reward_zero_std": 0.0, "grad_norm": 3.4905110186322834, "kl": 0.002685546875, "learning_rate": 8.159999999999999e-07, "loss": -0.0254, "num_tokens": 17759058.0, "reward": 0.0, "reward_std": 0.5542085766792297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.038401116124592335, "rewards/wordcountpos_reward/raw_geo/std": 0.06764465109504178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.15000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1107.4375, "completions/mean_terminated_length": 1107.4375, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.08201640328065612, "frac_reward_zero_std": 0.0, "grad_norm": 3.6040708015111025, "kl": 0.003147125244140625, "learning_rate": 8.179999999999999e-07, "loss": 0.0323, "num_tokens": 17799313.0, "reward": 0.0, "reward_std": 0.7635582685470581, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0743550991284492, "rewards/wordcountpos_reward/raw_geo/std": 0.21166073809703154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 897.9375, "completions/mean_terminated_length": 897.9375, "completions/min_length": 374.0, "completions/min_terminated_length": 374.0, "epoch": 0.08221644328865774, "frac_reward_zero_std": 0.0, "grad_norm": 2.1855816161929686, "kl": 0.001605987548828125, "learning_rate": 8.199999999999999e-07, "loss": -0.0926, "num_tokens": 17847680.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7678371667861938, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15831153786108407, "rewards/wordcountpos_reward/raw_geo/std": 0.11065331094720791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 1030.0625, "completions/mean_terminated_length": 1030.0625, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.08241648329665933, "frac_reward_zero_std": 0.0, "grad_norm": 3.6285630837492997, "kl": 0.0033416748046875, "learning_rate": 8.219999999999999e-07, "loss": -0.021, "num_tokens": 17886993.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0661065578460693, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04436963539968915, "rewards/wordcountpos_reward/raw_geo/std": 0.035825588739241035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1025.3125, "completions/mean_terminated_length": 1025.3125, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.08261652330466093, "frac_reward_zero_std": 0.0, "grad_norm": 3.7347910278745347, "kl": 0.00293731689453125, "learning_rate": 8.24e-07, "loss": 0.0178, "num_tokens": 17924366.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6692298650741577, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13173742005776823, "rewards/wordcountpos_reward/raw_geo/std": 0.08107770013541364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470282, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1143.6875, "completions/mean_terminated_length": 1143.6875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.08281656331266253, "frac_reward_zero_std": 0.0, "grad_norm": 2.863739208362909, "kl": 0.00208282470703125, "learning_rate": 8.259999999999999e-07, "loss": 0.0021, "num_tokens": 17967657.0, "reward": -2.9802322387695312e-08, "reward_std": 0.439179390668869, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01590819878941648, "rewards/wordcountpos_reward/raw_geo/std": 0.15021720362942084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1010.0625, "completions/mean_terminated_length": 1010.0625, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.08301660332066413, "frac_reward_zero_std": 0.0, "grad_norm": 3.018415832433584, "kl": 0.0016231536865234375, "learning_rate": 8.28e-07, "loss": 0.019, "num_tokens": 18016514.0, "reward": -7.450580596923828e-09, "reward_std": 1.0669922828674316, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0885631528219577, "rewards/wordcountpos_reward/raw_geo/std": 0.06570832073225384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 1300.25, "completions/mean_terminated_length": 1100.5, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.08321664332866573, "frac_reward_zero_std": 0.0, "grad_norm": 2.497882683419577, "kl": 0.0019235610961914062, "learning_rate": 8.299999999999999e-07, "loss": -0.0316, "num_tokens": 18059894.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9635406732559204, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016915762273923017, "rewards/wordcountpos_reward/raw_geo/std": 0.09011074580858566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16307235385739852, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1115.3125, "completions/mean_terminated_length": 1115.3125, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.08341668333666734, "frac_reward_zero_std": 0.0, "grad_norm": 3.174205009600204, "kl": 0.002552032470703125, "learning_rate": 8.319999999999999e-07, "loss": 0.0017, "num_tokens": 18101019.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0140455961227417, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.036933062777199335, "rewards/wordcountpos_reward/raw_geo/std": 0.06099117174755285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1081.125, "completions/mean_terminated_length": 1081.125, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.08361672334466894, "frac_reward_zero_std": 0.0, "grad_norm": 2.454527432100527, "kl": 0.0016956329345703125, "learning_rate": 8.34e-07, "loss": -0.0485, "num_tokens": 18137725.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6814285516738892, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09292350969956907, "rewards/wordcountpos_reward/raw_geo/std": 0.16834987222546696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.17018508443151817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1172.125, "completions/mean_terminated_length": 1150.2667236328125, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.08381676335267053, "frac_reward_zero_std": 0.0, "grad_norm": 2.722696306391821, "kl": 0.00156402587890625, "learning_rate": 8.359999999999999e-07, "loss": -0.05, "num_tokens": 18186975.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0589263439178467, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0050790527715392254, "rewards/wordcountpos_reward/raw_geo/std": 0.0694348774419158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1182.625, "completions/mean_terminated_length": 1137.2857666015625, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.08401680336067213, "frac_reward_zero_std": 0.0, "grad_norm": 3.3174118662944974, "kl": 0.00286102294921875, "learning_rate": 8.38e-07, "loss": 0.0217, "num_tokens": 18231665.0, "reward": 0.0, "reward_std": 0.7940347194671631, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09525078176517329, "rewards/wordcountpos_reward/raw_geo/std": 0.10568430602401672, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 1062.75, "completions/mean_terminated_length": 1062.75, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.08421684336867373, "frac_reward_zero_std": 0.0, "grad_norm": 2.9858728858312538, "kl": 0.0021114349365234375, "learning_rate": 8.399999999999999e-07, "loss": -0.036, "num_tokens": 18282037.0, "reward": -1.30385160446167e-08, "reward_std": 1.0569286346435547, "rewards/wordcountpos_reward/mean": -1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09440094520792518, "rewards/wordcountpos_reward/raw_geo/std": 0.03961531189437775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1134.60009765625, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.08441688337667534, "frac_reward_zero_std": 0.0, "grad_norm": 3.1243534879094694, "kl": 0.002193450927734375, "learning_rate": 8.419999999999999e-07, "loss": -0.0037, "num_tokens": 18325220.0, "reward": 0.0, "reward_std": 0.9127192497253418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04266925463173475, "rewards/wordcountpos_reward/raw_geo/std": 0.056298800110485235, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1163.1875, "completions/mean_terminated_length": 1163.1875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.08461692338467694, "frac_reward_zero_std": 0.0, "grad_norm": 2.1314119505993783, "kl": 0.0006833076477050781, "learning_rate": 8.439999999999999e-07, "loss": -0.0299, "num_tokens": 18362383.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7901430130004883, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06132893237799952, "rewards/wordcountpos_reward/raw_geo/std": 0.1512981953109685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1318.0625, "completions/mean_terminated_length": 1257.416748046875, "completions/min_length": 1175.0, "completions/min_terminated_length": 1175.0, "epoch": 0.08481696339267854, "frac_reward_zero_std": 0.0, "grad_norm": 2.1400305966241824, "kl": 0.0016889572143554688, "learning_rate": 8.459999999999999e-07, "loss": -0.018, "num_tokens": 18410776.0, "reward": 0.0, "reward_std": 1.0197169780731201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027326407128577597, "rewards/wordcountpos_reward/raw_geo/std": 0.045678254096492704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1087.3125, "completions/mean_terminated_length": 1087.3125, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.08501700340068014, "frac_reward_zero_std": 0.0, "grad_norm": 3.5559784916879056, "kl": 0.003215789794921875, "learning_rate": 8.48e-07, "loss": 0.0016, "num_tokens": 18460037.0, "reward": 2.9802322387695312e-08, "reward_std": 0.620617151260376, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15150400127886596, "rewards/wordcountpos_reward/raw_geo/std": 0.22420005285980008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 845.5625, "completions/mean_terminated_length": 845.5625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.08521704340868173, "frac_reward_zero_std": 0.0, "grad_norm": 4.166213227779742, "kl": 0.00321197509765625, "learning_rate": 8.499999999999999e-07, "loss": -0.0762, "num_tokens": 18495766.0, "reward": 0.0, "reward_std": 0.66336989402771, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0876084842089189, "rewards/wordcountpos_reward/raw_geo/std": 0.15112486218954263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.08541708341668333, "frac_reward_zero_std": 0.0, "grad_norm": 3.2879679226867595, "kl": 0.002384185791015625, "learning_rate": 8.52e-07, "loss": 0.0132, "num_tokens": 18533379.0, "reward": 0.0, "reward_std": 0.9046326279640198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0975877822923059, "rewards/wordcountpos_reward/raw_geo/std": 0.2373126395944559, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1320.0, "completions/mean_terminated_length": 1278.4615478515625, "completions/min_length": 1098.0, "completions/min_terminated_length": 1098.0, "epoch": 0.08561712342468494, "frac_reward_zero_std": 0.0, "grad_norm": 3.1621827070877644, "kl": 0.002986907958984375, "learning_rate": 8.539999999999999e-07, "loss": 0.0269, "num_tokens": 18587243.0, "reward": 0.0, "reward_std": 0.956751823425293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2725917107765537, "rewards/wordcountpos_reward/raw_geo/std": 0.3118636050900945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1051.875, "completions/mean_terminated_length": 1051.875, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.08581716343268654, "frac_reward_zero_std": 0.0, "grad_norm": 3.30993313583991, "kl": 0.0030975341796875, "learning_rate": 8.559999999999999e-07, "loss": -0.0733, "num_tokens": 18631649.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0577553510665894, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07263579775687691, "rewards/wordcountpos_reward/raw_geo/std": 0.06826025246986418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1235.6875, "completions/mean_terminated_length": 1147.5833740234375, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.08601720344068814, "frac_reward_zero_std": 0.0, "grad_norm": 3.299305001474713, "kl": 0.00311279296875, "learning_rate": 8.58e-07, "loss": -0.0487, "num_tokens": 18675052.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8397456407546997, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06779078550927821, "rewards/wordcountpos_reward/raw_geo/std": 0.06631056658891393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1123.9375, "completions/mean_terminated_length": 1070.21435546875, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.08621724344868974, "frac_reward_zero_std": 0.0, "grad_norm": 3.2689711595423154, "kl": 0.0028076171875, "learning_rate": 8.599999999999999e-07, "loss": -0.0196, "num_tokens": 18714307.0, "reward": 0.0, "reward_std": 0.9198898077011108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029487683276100202, "rewards/wordcountpos_reward/raw_geo/std": 0.16314352642400448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15682025568335423, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1083.0, "completions/mean_terminated_length": 1083.0, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.08641728345669134, "frac_reward_zero_std": 0.0, "grad_norm": 2.580099266180246, "kl": 0.001789093017578125, "learning_rate": 8.62e-07, "loss": -0.0264, "num_tokens": 18754699.0, "reward": 0.0, "reward_std": 0.5988175272941589, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0619943848886873, "rewards/wordcountpos_reward/raw_geo/std": 0.190361213697201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1308.4375, "completions/mean_terminated_length": 1193.5, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.08661732346469293, "frac_reward_zero_std": 0.0, "grad_norm": 3.3061758103046195, "kl": 0.002994537353515625, "learning_rate": 8.639999999999999e-07, "loss": -0.0086, "num_tokens": 18807722.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8916733264923096, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1431637622634425, "rewards/wordcountpos_reward/raw_geo/std": 0.055878666223157104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1197992147380435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1209.25, "completions/mean_terminated_length": 1077.0909423828125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.08681736347269454, "frac_reward_zero_std": 0.0, "grad_norm": 3.3136994035699843, "kl": 0.00252532958984375, "learning_rate": 8.659999999999999e-07, "loss": 0.0123, "num_tokens": 18856574.0, "reward": 0.0, "reward_std": 0.6713405847549438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03645705260354844, "rewards/wordcountpos_reward/raw_geo/std": 0.08884293456383821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.133263870794973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1005.25, "completions/mean_terminated_length": 1005.25, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.08701740348069614, "frac_reward_zero_std": 0.0, "grad_norm": 3.739858934924631, "kl": 0.0022830963134765625, "learning_rate": 8.68e-07, "loss": -0.0203, "num_tokens": 18887154.0, "reward": 0.0, "reward_std": 0.8702331781387329, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003987106773799781, "rewards/wordcountpos_reward/raw_geo/std": 0.04258197403398292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1306.4375, "completions/mean_terminated_length": 1261.769287109375, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.08721744348869774, "frac_reward_zero_std": 0.0, "grad_norm": 3.2976868680562763, "kl": 0.003086090087890625, "learning_rate": 8.699999999999999e-07, "loss": -0.0189, "num_tokens": 18939729.0, "reward": 0.0, "reward_std": 1.0671062469482422, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3183368034364289, "rewards/wordcountpos_reward/raw_geo/std": 0.29246018487084263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1095.0, "completions/mean_terminated_length": 1095.0, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.08741748349669934, "frac_reward_zero_std": 0.0, "grad_norm": 3.0685868225747104, "kl": 0.0023365020751953125, "learning_rate": 8.72e-07, "loss": -0.0397, "num_tokens": 18988665.0, "reward": -5.960464477539063e-08, "reward_std": 0.3992195129394531, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07773019844889655, "rewards/wordcountpos_reward/raw_geo/std": 0.17176865459540083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1018.3077392578125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.08761752350470094, "frac_reward_zero_std": 0.0, "grad_norm": 3.046517645830191, "kl": 0.002349853515625, "learning_rate": 8.739999999999999e-07, "loss": -0.0278, "num_tokens": 19028155.0, "reward": 0.0, "reward_std": 0.9707126617431641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.037594242635203765, "rewards/wordcountpos_reward/raw_geo/std": 0.11029408568025839, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1063.0625, "completions/mean_terminated_length": 1063.0625, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.08781756351270253, "frac_reward_zero_std": 0.0, "grad_norm": 2.4351577943229126, "kl": 0.0015048980712890625, "learning_rate": 8.76e-07, "loss": -0.0262, "num_tokens": 19071540.0, "reward": 0.0, "reward_std": 0.6890659332275391, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3749114757417403, "rewards/wordcountpos_reward/raw_geo/std": 0.3589918747478618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1210.5, "completions/mean_terminated_length": 1114.0, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.08801760352070415, "frac_reward_zero_std": 0.0, "grad_norm": 3.180666596159949, "kl": 0.002948760986328125, "learning_rate": 8.78e-07, "loss": 0.0119, "num_tokens": 19115948.0, "reward": 0.0, "reward_std": 0.8787262439727783, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027289230439799333, "rewards/wordcountpos_reward/raw_geo/std": 0.180860792186217, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1135.75, "completions/mean_terminated_length": 1135.75, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.08821764352870574, "frac_reward_zero_std": 0.0, "grad_norm": 3.201098455785099, "kl": 0.003116607666015625, "learning_rate": 8.799999999999999e-07, "loss": -0.0239, "num_tokens": 19166248.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7383455634117126, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10906757176808307, "rewards/wordcountpos_reward/raw_geo/std": 0.10850857774550682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1266.6875, "completions/mean_terminated_length": 1188.916748046875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.08841768353670734, "frac_reward_zero_std": 0.0, "grad_norm": 3.244916872938238, "kl": 0.003330230712890625, "learning_rate": 8.82e-07, "loss": -0.0236, "num_tokens": 19219771.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5060003995895386, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05112165519476513, "rewards/wordcountpos_reward/raw_geo/std": 0.08143172569982593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 987.25, "completions/mean_terminated_length": 987.25, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.08861772354470894, "frac_reward_zero_std": 0.0, "grad_norm": 3.801227701350928, "kl": 0.00263214111328125, "learning_rate": 8.839999999999999e-07, "loss": -0.0038, "num_tokens": 19259527.0, "reward": -5.960464477539063e-08, "reward_std": 0.864219069480896, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059527251674027566, "rewards/wordcountpos_reward/raw_geo/std": 0.09559034088448276, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 930.4375, "completions/mean_terminated_length": 930.4375, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.08881776355271054, "frac_reward_zero_std": 0.0, "grad_norm": 3.4880641149596223, "kl": 0.00276947021484375, "learning_rate": 8.86e-07, "loss": -0.0167, "num_tokens": 19299238.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7947709560394287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15473765789872287, "rewards/wordcountpos_reward/raw_geo/std": 0.22052653973512842, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1281.9375, "completions/mean_terminated_length": 1231.615478515625, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.08901780356071214, "frac_reward_zero_std": 0.0, "grad_norm": 3.4593938441905485, "kl": 0.00312042236328125, "learning_rate": 8.88e-07, "loss": -0.0204, "num_tokens": 19345069.0, "reward": 0.0, "reward_std": 0.7192075252532959, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.060805674299536865, "rewards/wordcountpos_reward/raw_geo/std": 0.1540085555548371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1106.9375, "completions/mean_terminated_length": 1016.2308349609375, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.08921784356871375, "frac_reward_zero_std": 0.0, "grad_norm": 2.3577051480288223, "kl": 0.0016155242919921875, "learning_rate": 8.9e-07, "loss": 0.0156, "num_tokens": 19393796.0, "reward": 0.0, "reward_std": 0.8608711957931519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10506878252243318, "rewards/wordcountpos_reward/raw_geo/std": 0.07971488671181962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1078.1875, "completions/mean_terminated_length": 1050.0667724609375, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.08941788357671535, "frac_reward_zero_std": 0.0, "grad_norm": 3.7693851524506212, "kl": 0.0038604736328125, "learning_rate": 8.92e-07, "loss": -0.0196, "num_tokens": 19446863.0, "reward": 5.587935447692871e-09, "reward_std": 1.0194907188415527, "rewards/wordcountpos_reward/mean": 5.587935447692871e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.23204758343737142, "rewards/wordcountpos_reward/raw_geo/std": 0.263949380435127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1207.3125, "completions/mean_terminated_length": 1109.75, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.08961792358471694, "frac_reward_zero_std": 0.0, "grad_norm": 2.9068800631573892, "kl": 0.00243377685546875, "learning_rate": 8.939999999999999e-07, "loss": 0.0011, "num_tokens": 19499324.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8448344469070435, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1561754058795112, "rewards/wordcountpos_reward/raw_geo/std": 0.09686430726169433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1077.5625, "completions/mean_terminated_length": 1077.5625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.08981796359271854, "frac_reward_zero_std": 0.0, "grad_norm": 2.7248304281811837, "kl": 0.0017337799072265625, "learning_rate": 8.96e-07, "loss": 0.0125, "num_tokens": 19548149.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0590662956237793, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027372537861726533, "rewards/wordcountpos_reward/raw_geo/std": 0.11804690267772838, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055637, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1274.0, "completions/mean_terminated_length": 1258.933349609375, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.09001800360072014, "frac_reward_zero_std": 0.0, "grad_norm": 3.318786933750916, "kl": 0.0032501220703125, "learning_rate": 8.98e-07, "loss": 0.0373, "num_tokens": 19596029.0, "reward": 7.450580596923828e-09, "reward_std": 1.0158376693725586, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11766312978343946, "rewards/wordcountpos_reward/raw_geo/std": 0.09973759725612268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382576, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1108.375, "completions/mean_terminated_length": 1108.375, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.09021804360872174, "frac_reward_zero_std": 0.0, "grad_norm": 3.0416041909251246, "kl": 0.0021419525146484375, "learning_rate": 9e-07, "loss": -0.052, "num_tokens": 19647387.0, "reward": -1.4901161193847656e-08, "reward_std": 0.946031928062439, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02360611013471328, "rewards/wordcountpos_reward/raw_geo/std": 0.11839692479058546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1105.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09041808361672335, "frac_reward_zero_std": 0.0, "grad_norm": 3.36723731552191, "kl": 0.002902984619140625, "learning_rate": 9.02e-07, "loss": -0.0397, "num_tokens": 19698469.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5664991140365601, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019582839167614552, "rewards/wordcountpos_reward/raw_geo/std": 0.14939457389724295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1025.9375, "completions/mean_terminated_length": 994.3333740234375, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.09061812362472495, "frac_reward_zero_std": 0.0, "grad_norm": 3.7303988369400694, "kl": 0.00295257568359375, "learning_rate": 9.039999999999999e-07, "loss": -0.0383, "num_tokens": 19736444.0, "reward": -7.450580596923828e-09, "reward_std": 1.0268616676330566, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010483479188954837, "rewards/wordcountpos_reward/raw_geo/std": 0.04943226496645019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.18252346373772008, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1000.5625, "completions/mean_terminated_length": 929.21435546875, "completions/min_length": 556.0, "completions/min_terminated_length": 556.0, "epoch": 0.09081816363272655, "frac_reward_zero_std": 0.0, "grad_norm": 3.157173861491531, "kl": 0.0021572113037109375, "learning_rate": 9.06e-07, "loss": -0.0327, "num_tokens": 19791541.0, "reward": 0.0, "reward_std": 0.9003146886825562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04191839017513677, "rewards/wordcountpos_reward/raw_geo/std": 0.10478729870999097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1097.0625, "completions/mean_terminated_length": 1070.2000732421875, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.09101820364072814, "frac_reward_zero_std": 0.0, "grad_norm": 3.171057841397273, "kl": 0.0021343231201171875, "learning_rate": 9.08e-07, "loss": -0.0449, "num_tokens": 19830086.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5525245666503906, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09104565016963011, "rewards/wordcountpos_reward/raw_geo/std": 0.088891372838374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1045.875, "completions/mean_terminated_length": 1045.875, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.09121824364872974, "frac_reward_zero_std": 0.0, "grad_norm": 3.580149128499416, "kl": 0.003387451171875, "learning_rate": 9.1e-07, "loss": -0.0217, "num_tokens": 19874276.0, "reward": 0.0, "reward_std": 0.4022776782512665, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16431213167510778, "rewards/wordcountpos_reward/raw_geo/std": 0.40338450210035404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1062.4375, "completions/mean_terminated_length": 1033.2667236328125, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.09141828365673135, "frac_reward_zero_std": 0.0, "grad_norm": 2.6523201988007066, "kl": 0.0021839141845703125, "learning_rate": 9.12e-07, "loss": 0.044, "num_tokens": 19915979.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0247694253921509, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054765778931615196, "rewards/wordcountpos_reward/raw_geo/std": 0.10303805703992713, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 985.1875, "completions/mean_terminated_length": 950.86669921875, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.09161832366473295, "frac_reward_zero_std": 0.0, "grad_norm": 2.9604431023636972, "kl": 0.001804351806640625, "learning_rate": 9.14e-07, "loss": 0.0146, "num_tokens": 19949886.0, "reward": 0.0, "reward_std": 0.9891246557235718, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014044014781523294, "rewards/wordcountpos_reward/raw_geo/std": 0.162384230061039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 868.25, "completions/mean_terminated_length": 868.25, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.09181836367273455, "frac_reward_zero_std": 0.0, "grad_norm": 3.3286787778973164, "kl": 0.002689361572265625, "learning_rate": 9.16e-07, "loss": -0.0033, "num_tokens": 19988010.0, "reward": -5.960464477539063e-08, "reward_std": 0.8544387817382812, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10540768075397114, "rewards/wordcountpos_reward/raw_geo/std": 0.17074815818788974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1314.0625, "completions/mean_terminated_length": 1229.5455322265625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.09201840368073615, "frac_reward_zero_std": 0.0, "grad_norm": 1.683631270482807, "kl": 0.0011749267578125, "learning_rate": 9.18e-07, "loss": 0.0296, "num_tokens": 20030211.0, "reward": 0.0, "reward_std": 0.6370956897735596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2826937558590972, "rewards/wordcountpos_reward/raw_geo/std": 0.3317353558341128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 941.125, "completions/mean_terminated_length": 941.125, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.09221844368873774, "frac_reward_zero_std": 0.0, "grad_norm": 3.5927098932667105, "kl": 0.0031414031982421875, "learning_rate": 9.2e-07, "loss": -0.0582, "num_tokens": 20084397.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7087376117706299, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16634700842841904, "rewards/wordcountpos_reward/raw_geo/std": 0.08169764145115765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1189459883650901, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1282.3125, "completions/mean_terminated_length": 1209.75, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.09241848369673934, "frac_reward_zero_std": 0.0, "grad_norm": 2.9637117047520802, "kl": 0.0028533935546875, "learning_rate": 9.22e-07, "loss": -0.0477, "num_tokens": 20135162.0, "reward": 0.0, "reward_std": 0.9201784133911133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04087278107322107, "rewards/wordcountpos_reward/raw_geo/std": 0.09916782732043211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1166.0, "completions/mean_terminated_length": 1014.1818237304688, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.09261852370474095, "frac_reward_zero_std": 0.0, "grad_norm": 3.4496587125355274, "kl": 0.00307464599609375, "learning_rate": 9.24e-07, "loss": 0.0141, "num_tokens": 20186546.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0554344654083252, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06641499834745053, "rewards/wordcountpos_reward/raw_geo/std": 0.06972501744383654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1037.0, "completions/mean_terminated_length": 1037.0, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.09281856371274255, "frac_reward_zero_std": 0.0, "grad_norm": 3.4791752076316773, "kl": 0.003498077392578125, "learning_rate": 9.26e-07, "loss": -0.0024, "num_tokens": 20227266.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0228264331817627, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05289641691077268, "rewards/wordcountpos_reward/raw_geo/std": 0.06736946918752376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1216.5, "completions/mean_terminated_length": 1197.60009765625, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.09301860372074415, "frac_reward_zero_std": 0.0, "grad_norm": 2.5043722242181445, "kl": 0.0016031265258789062, "learning_rate": 9.28e-07, "loss": 0.0057, "num_tokens": 20271642.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9817145466804504, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12695525480440295, "rewards/wordcountpos_reward/raw_geo/std": 0.06893485379345207, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06070572613176771, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 929.0, "completions/max_terminated_length": 929.0, "completions/mean_length": 794.8125, "completions/mean_terminated_length": 794.8125, "completions/min_length": 545.0, "completions/min_terminated_length": 545.0, "epoch": 0.09321864372874575, "frac_reward_zero_std": 0.0, "grad_norm": 4.16050644224258, "kl": 0.003055572509765625, "learning_rate": 9.3e-07, "loss": 0.0357, "num_tokens": 20311055.0, "reward": -7.450580596923828e-09, "reward_std": 1.0447560548782349, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0017178301668119714, "rewards/wordcountpos_reward/raw_geo/std": 0.006871320667247886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 1051.3125, "completions/mean_terminated_length": 1021.4000244140625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.09341868373674735, "frac_reward_zero_std": 0.0, "grad_norm": 3.2870134690019093, "kl": 0.002498626708984375, "learning_rate": 9.32e-07, "loss": 0.0004, "num_tokens": 20349572.0, "reward": 0.0, "reward_std": 0.7005062699317932, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011948765149479162, "rewards/wordcountpos_reward/raw_geo/std": 0.04819504353936146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1018.875, "completions/mean_terminated_length": 986.800048828125, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.09361872374474894, "frac_reward_zero_std": 0.0, "grad_norm": 3.121983313684933, "kl": 0.002147674560546875, "learning_rate": 9.34e-07, "loss": 0.0256, "num_tokens": 20391170.0, "reward": 0.0, "reward_std": 1.006962776184082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014171749608429755, "rewards/wordcountpos_reward/raw_geo/std": 0.08439567214228119, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1088662107903635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1192.4375, "completions/mean_terminated_length": 1192.4375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.09381876375275056, "frac_reward_zero_std": 0.0, "grad_norm": 2.684943131551981, "kl": 0.0020389556884765625, "learning_rate": 9.36e-07, "loss": -0.0193, "num_tokens": 20424025.0, "reward": 0.0, "reward_std": 0.4325961470603943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027337552314660955, "rewards/wordcountpos_reward/raw_geo/std": 0.07482321122107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1236.1875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.09401880376075215, "frac_reward_zero_std": 0.0, "grad_norm": 2.095362318853056, "kl": 0.0007042884826660156, "learning_rate": 9.379999999999998e-07, "loss": 0.0008, "num_tokens": 20466852.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9703868627548218, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09096523782005494, "rewards/wordcountpos_reward/raw_geo/std": 0.1597409795726704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.09421884376875375, "frac_reward_zero_std": 0.0, "grad_norm": 2.4935716459119375, "kl": 0.002117156982421875, "learning_rate": 9.399999999999999e-07, "loss": -0.0348, "num_tokens": 20514764.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8660947680473328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04001216727402988, "rewards/wordcountpos_reward/raw_geo/std": 0.10160015094173779, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1173.5625, "completions/mean_terminated_length": 1173.5625, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.09441888377675535, "frac_reward_zero_std": 0.0, "grad_norm": 2.5935614888674263, "kl": 0.0017590522766113281, "learning_rate": 9.419999999999999e-07, "loss": 0.0054, "num_tokens": 20557757.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9719048738479614, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06503271457099888, "rewards/wordcountpos_reward/raw_geo/std": 0.14974833025692413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.09461892378475695, "frac_reward_zero_std": 0.0, "grad_norm": 3.3986497450490534, "kl": 0.002597808837890625, "learning_rate": 9.439999999999999e-07, "loss": 0.0448, "num_tokens": 20606269.0, "reward": 0.0, "reward_std": 0.4908173084259033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05732191105705363, "rewards/wordcountpos_reward/raw_geo/std": 0.14266987840195366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1096.8125, "completions/mean_terminated_length": 1096.8125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.09481896379275855, "frac_reward_zero_std": 0.0, "grad_norm": 3.0694386003965044, "kl": 0.0021696090698242188, "learning_rate": 9.459999999999999e-07, "loss": -0.0266, "num_tokens": 20652386.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9674227237701416, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15948392931483177, "rewards/wordcountpos_reward/raw_geo/std": 0.14433339806232215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 932.625, "completions/mean_terminated_length": 932.625, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.09501900380076016, "frac_reward_zero_std": 0.0, "grad_norm": 3.2306023985529873, "kl": 0.001682281494140625, "learning_rate": 9.479999999999999e-07, "loss": -0.052, "num_tokens": 20693556.0, "reward": 0.0, "reward_std": 0.8287612795829773, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05784487664166692, "rewards/wordcountpos_reward/raw_geo/std": 0.1167221257573027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390615, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1066.125, "completions/mean_terminated_length": 1066.125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.09521904380876176, "frac_reward_zero_std": 0.0, "grad_norm": 1.3882257807466178, "kl": 0.0004857778549194336, "learning_rate": 9.499999999999999e-07, "loss": -0.0003, "num_tokens": 20742846.0, "reward": 0.0, "reward_std": 0.9937007427215576, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028478928645724357, "rewards/wordcountpos_reward/raw_geo/std": 0.0800622126073824, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185828, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1134.1875, "completions/mean_terminated_length": 1134.1875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.09541908381676335, "frac_reward_zero_std": 0.0, "grad_norm": 3.4461237601519192, "kl": 0.003337860107421875, "learning_rate": 9.52e-07, "loss": -0.0145, "num_tokens": 20780905.0, "reward": 0.0, "reward_std": 1.0549769401550293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0020607187762826987, "rewards/wordcountpos_reward/raw_geo/std": 0.09353046792228852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1307.5, "completions/mean_terminated_length": 1307.5, "completions/min_length": 1091.0, "completions/min_terminated_length": 1091.0, "epoch": 0.09561912382476495, "frac_reward_zero_std": 0.0, "grad_norm": 2.610722762470047, "kl": 0.00225067138671875, "learning_rate": 9.539999999999999e-07, "loss": -0.0187, "num_tokens": 20839105.0, "reward": 0.0, "reward_std": 0.759341835975647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029143578409760205, "rewards/wordcountpos_reward/raw_geo/std": 0.05348086018287396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1413.6875, "completions/mean_terminated_length": 1327.375, "completions/min_length": 1168.0, "completions/min_terminated_length": 1168.0, "epoch": 0.09581916383276655, "frac_reward_zero_std": 0.0, "grad_norm": 1.4588838362600443, "kl": 0.0006351470947265625, "learning_rate": 9.559999999999998e-07, "loss": -0.0178, "num_tokens": 20875980.0, "reward": 0.0, "reward_std": 0.9503426551818848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1072165083130928, "rewards/wordcountpos_reward/raw_geo/std": 0.13808855888773275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 840.8125, "completions/mean_terminated_length": 796.86669921875, "completions/min_length": 409.0, "completions/min_terminated_length": 409.0, "epoch": 0.09601920384076815, "frac_reward_zero_std": 0.0, "grad_norm": 2.7926570468671876, "kl": 0.0018596649169921875, "learning_rate": 9.58e-07, "loss": -0.0747, "num_tokens": 20905481.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7571004629135132, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0034355670983920433, "rewards/wordcountpos_reward/raw_geo/std": 0.1322509009792835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1219.6875, "completions/mean_terminated_length": 1179.6429443359375, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.09621924384876976, "frac_reward_zero_std": 0.0, "grad_norm": 2.9461889410208433, "kl": 0.002044677734375, "learning_rate": 9.6e-07, "loss": 0.0471, "num_tokens": 20944196.0, "reward": -2.9802322387695312e-08, "reward_std": 0.776721715927124, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009757609977105022, "rewards/wordcountpos_reward/raw_geo/std": 0.08117345549545481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1258.625, "completions/mean_terminated_length": 1258.625, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.09641928385677136, "frac_reward_zero_std": 0.0, "grad_norm": 2.4707941232157884, "kl": 0.0020122528076171875, "learning_rate": 9.619999999999999e-07, "loss": 0.0098, "num_tokens": 20981870.0, "reward": 0.0, "reward_std": 1.058112382888794, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0910334419908491, "rewards/wordcountpos_reward/raw_geo/std": 0.048551581882787916, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1013.5625, "completions/mean_terminated_length": 1013.5625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.09661932386477295, "frac_reward_zero_std": 0.0, "grad_norm": 2.770317252624303, "kl": 0.0017871856689453125, "learning_rate": 9.64e-07, "loss": 0.0096, "num_tokens": 21023535.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0308070182800293, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11234302193644059, "rewards/wordcountpos_reward/raw_geo/std": 0.2621016406184735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 918.25, "completions/mean_terminated_length": 918.25, "completions/min_length": 640.0, "completions/min_terminated_length": 640.0, "epoch": 0.09681936387277455, "frac_reward_zero_std": 0.0, "grad_norm": 3.847537337325355, "kl": 0.003387451171875, "learning_rate": 9.66e-07, "loss": -0.0322, "num_tokens": 21060539.0, "reward": 0.0, "reward_std": 0.5896586179733276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06407387064858733, "rewards/wordcountpos_reward/raw_geo/std": 0.13089185255501323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1138.3125, "completions/mean_terminated_length": 1138.3125, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.09701940388077615, "frac_reward_zero_std": 0.0, "grad_norm": 2.1138505375212415, "kl": 0.0018100738525390625, "learning_rate": 9.679999999999999e-07, "loss": -0.0142, "num_tokens": 21104968.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9472931623458862, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07957419468080931, "rewards/wordcountpos_reward/raw_geo/std": 0.08949099042844877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1338185615204685, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1156.3125, "completions/mean_terminated_length": 1133.4000244140625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.09721944388877776, "frac_reward_zero_std": 0.0, "grad_norm": 3.163610426942226, "kl": 0.003108978271484375, "learning_rate": 9.7e-07, "loss": 0.0222, "num_tokens": 21148709.0, "reward": 0.0, "reward_std": 0.8028128147125244, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09171651298782155, "rewards/wordcountpos_reward/raw_geo/std": 0.27495332105799164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081414, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1184.875, "completions/mean_terminated_length": 1184.875, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.09741948389677936, "frac_reward_zero_std": 0.0, "grad_norm": 3.440813258319072, "kl": 0.002994537353515625, "learning_rate": 9.72e-07, "loss": 0.0383, "num_tokens": 21200035.0, "reward": 0.0, "reward_std": 0.838242769241333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08261366674540484, "rewards/wordcountpos_reward/raw_geo/std": 0.07967757495576122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.25265259415516267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1141.5, "completions/mean_terminated_length": 1141.5, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.09761952390478096, "frac_reward_zero_std": 0.0, "grad_norm": 2.6848111587225514, "kl": 0.0020599365234375, "learning_rate": 9.74e-07, "loss": -0.0204, "num_tokens": 21234211.0, "reward": 0.0, "reward_std": 0.5016034841537476, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01833172982551814, "rewards/wordcountpos_reward/raw_geo/std": 0.13815844082096307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 999.375, "completions/mean_terminated_length": 999.375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09781956391278256, "frac_reward_zero_std": 0.0, "grad_norm": 3.8610230532854484, "kl": 0.003719329833984375, "learning_rate": 9.759999999999998e-07, "loss": 0.0163, "num_tokens": 21277873.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5087968111038208, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029772728156827268, "rewards/wordcountpos_reward/raw_geo/std": 0.14740463180699628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1359.3125, "completions/mean_terminated_length": 1326.84619140625, "completions/min_length": 1168.0, "completions/min_terminated_length": 1168.0, "epoch": 0.09801960392078415, "frac_reward_zero_std": 0.0, "grad_norm": 2.7083956629899415, "kl": 0.002582550048828125, "learning_rate": 9.78e-07, "loss": 0.0157, "num_tokens": 21330934.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9627735614776611, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11160320443813755, "rewards/wordcountpos_reward/raw_geo/std": 0.11353641005806972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1200.3125, "completions/mean_terminated_length": 1157.5, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.09821964392878575, "frac_reward_zero_std": 0.0, "grad_norm": 2.876192888327194, "kl": 0.002735137939453125, "learning_rate": 9.8e-07, "loss": -0.0199, "num_tokens": 21386075.0, "reward": -7.450580596923828e-09, "reward_std": 1.0218180418014526, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10738691415552819, "rewards/wordcountpos_reward/raw_geo/std": 0.06044878387629908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward/raw_rule/std": 0.1796601730428249, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1120.625, "completions/mean_terminated_length": 1120.625, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.09841968393678736, "frac_reward_zero_std": 0.0, "grad_norm": 2.532748681692034, "kl": 0.002788543701171875, "learning_rate": 9.819999999999999e-07, "loss": -0.0129, "num_tokens": 21421573.0, "reward": 0.0, "reward_std": 0.8296001553535461, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.028184053989471766, "rewards/wordcountpos_reward/raw_geo/std": 0.10575152905320613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 993.625, "completions/mean_terminated_length": 993.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09861972394478896, "frac_reward_zero_std": 0.0, "grad_norm": 3.7098073803652145, "kl": 0.003170013427734375, "learning_rate": 9.84e-07, "loss": -0.026, "num_tokens": 21459607.0, "reward": 7.450580596923828e-09, "reward_std": 1.065168857574463, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.014650490817114411, "rewards/wordcountpos_reward/raw_geo/std": 0.07698630368220064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1193.3125, "completions/mean_terminated_length": 1091.0833740234375, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.09881976395279056, "frac_reward_zero_std": 0.0, "grad_norm": 3.351053765361233, "kl": 0.003322601318359375, "learning_rate": 9.86e-07, "loss": -0.0419, "num_tokens": 21499092.0, "reward": 2.9802322387695312e-08, "reward_std": 0.576299786567688, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09818906152562937, "rewards/wordcountpos_reward/raw_geo/std": 0.11096192058241681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1049.0, "completions/max_terminated_length": 1049.0, "completions/mean_length": 991.3125, "completions/mean_terminated_length": 991.3125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.09901980396079216, "frac_reward_zero_std": 0.0, "grad_norm": 2.7093974238990075, "kl": 0.0017337799072265625, "learning_rate": 9.88e-07, "loss": 0.0047, "num_tokens": 21533809.0, "reward": -2.9802322387695312e-08, "reward_std": 0.914000391960144, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15058066876217352, "rewards/wordcountpos_reward/raw_geo/std": 0.05065428516809082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1337.0625, "completions/mean_terminated_length": 1239.300048828125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.09921984396879376, "frac_reward_zero_std": 0.0, "grad_norm": 3.305958590103276, "kl": 0.00327301025390625, "learning_rate": 9.9e-07, "loss": 0.0512, "num_tokens": 21573442.0, "reward": 0.0, "reward_std": 0.8479118347167969, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17085014681717073, "rewards/wordcountpos_reward/raw_geo/std": 0.23646436101757687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1079.5625, "completions/mean_terminated_length": 1079.5625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.09941988397679535, "frac_reward_zero_std": 0.0, "grad_norm": 2.237181195884694, "kl": 0.0020618438720703125, "learning_rate": 9.92e-07, "loss": -0.0268, "num_tokens": 21605995.0, "reward": 0.0, "reward_std": 0.7448483109474182, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.054934752271677906, "rewards/wordcountpos_reward/raw_geo/std": 0.14111626439720662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1188.1875, "completions/mean_terminated_length": 1143.6429443359375, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.09961992398479697, "frac_reward_zero_std": 0.0, "grad_norm": 2.4564196381984553, "kl": 0.0016880035400390625, "learning_rate": 9.94e-07, "loss": -0.036, "num_tokens": 21658334.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6954265832901001, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15044035077604337, "rewards/wordcountpos_reward/raw_geo/std": 0.12864148154055016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 985.375, "completions/mean_terminated_length": 985.375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.09981996399279856, "frac_reward_zero_std": 0.0, "grad_norm": 3.2024178687062435, "kl": 0.002910614013671875, "learning_rate": 9.959999999999999e-07, "loss": 0.0009, "num_tokens": 21690084.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0436984300613403, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053173208857816914, "rewards/wordcountpos_reward/raw_geo/std": 0.059335139890900584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1138.0, "completions/max_terminated_length": 1138.0, "completions/mean_length": 860.125, "completions/mean_terminated_length": 860.125, "completions/min_length": 613.0, "completions/min_terminated_length": 613.0, "epoch": 0.10002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 3.589445308970012, "kl": 0.003017425537109375, "learning_rate": 9.98e-07, "loss": 0.0084, "num_tokens": 21719582.0, "reward": 0.0, "reward_std": 0.6890698671340942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14452684010703898, "rewards/wordcountpos_reward/raw_geo/std": 0.14872745340920018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1170.1875, "completions/mean_terminated_length": 1148.2000732421875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.10022004400880176, "frac_reward_zero_std": 0.0, "grad_norm": 3.3503278285045908, "kl": 0.003173828125, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 21754897.0, "reward": 7.450580596923828e-09, "reward_std": 1.056408166885376, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05163033368753471, "rewards/wordcountpos_reward/raw_geo/std": 0.04741391081471719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1179.0, "completions/max_terminated_length": 1179.0, "completions/mean_length": 967.4375, "completions/mean_terminated_length": 967.4375, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.10042008401680336, "frac_reward_zero_std": 0.0, "grad_norm": 3.3992872991476126, "kl": 0.00319671630859375, "learning_rate": 9.999998902889782e-07, "loss": -0.007, "num_tokens": 21793880.0, "reward": 0.0, "reward_std": 0.5806048512458801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027409740286549706, "rewards/wordcountpos_reward/raw_geo/std": 0.15258789571654874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1107.375, "completions/mean_terminated_length": 1107.375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.10062012402480495, "frac_reward_zero_std": 0.0, "grad_norm": 3.468943403079575, "kl": 0.003437042236328125, "learning_rate": 9.999995611559667e-07, "loss": 0.0029, "num_tokens": 21834766.0, "reward": 0.0, "reward_std": 0.9914134740829468, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1520949058432, "rewards/wordcountpos_reward/raw_geo/std": 0.21153564619625878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1131.1875, "completions/mean_terminated_length": 1131.1875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.10082016403280657, "frac_reward_zero_std": 0.0, "grad_norm": 3.4549201260688407, "kl": 0.00376129150390625, "learning_rate": 9.999990126011257e-07, "loss": 0.0037, "num_tokens": 21879961.0, "reward": 0.0, "reward_std": 0.8015230298042297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03451011290989368, "rewards/wordcountpos_reward/raw_geo/std": 0.0662209835672636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1166.25, "completions/mean_terminated_length": 1014.5454711914062, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.10102020404080816, "frac_reward_zero_std": 0.0, "grad_norm": 3.0191254994649896, "kl": 0.003025054931640625, "learning_rate": 9.999982446247225e-07, "loss": 0.0079, "num_tokens": 21928437.0, "reward": 0.0, "reward_std": 0.8550890684127808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11181711842126814, "rewards/wordcountpos_reward/raw_geo/std": 0.05753556447331552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 828.875, "completions/mean_terminated_length": 828.875, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.10122024404880976, "frac_reward_zero_std": 0.0, "grad_norm": 3.6007722290881596, "kl": 0.0030851364135742188, "learning_rate": 9.999972572271322e-07, "loss": -0.0071, "num_tokens": 21967739.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0402424335479736, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20852460988398958, "rewards/wordcountpos_reward/raw_geo/std": 0.17636089333781668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994057, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1080.8125, "completions/mean_terminated_length": 1080.8125, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.10142028405681136, "frac_reward_zero_std": 0.0, "grad_norm": 2.58876308164753, "kl": 0.00179290771484375, "learning_rate": 9.999960504088355e-07, "loss": 0.004, "num_tokens": 22016000.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9667521715164185, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10357312940039556, "rewards/wordcountpos_reward/raw_geo/std": 0.14473622461103544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1175.4375, "completions/mean_terminated_length": 1129.071533203125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.10162032406481296, "frac_reward_zero_std": 0.0, "grad_norm": 3.1394805052302046, "kl": 0.0032958984375, "learning_rate": 9.999946241704217e-07, "loss": 0.0209, "num_tokens": 22065887.0, "reward": -9.313225746154785e-09, "reward_std": 1.0673675537109375, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14287042195619049, "rewards/wordcountpos_reward/raw_geo/std": 0.2783586077613992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 923.8125, "completions/mean_terminated_length": 923.8125, "completions/min_length": 475.0, "completions/min_terminated_length": 475.0, "epoch": 0.10182036407281456, "frac_reward_zero_std": 0.0, "grad_norm": 3.6332851176440375, "kl": 0.002704620361328125, "learning_rate": 9.999929785125855e-07, "loss": -0.0539, "num_tokens": 22100532.0, "reward": -7.450580596923828e-09, "reward_std": 1.0480988025665283, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05402005420071991, "rewards/wordcountpos_reward/raw_geo/std": 0.0819726518515701, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.161245154965971, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1111.375, "completions/mean_terminated_length": 1085.4666748046875, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.10202040408081617, "frac_reward_zero_std": 0.0, "grad_norm": 3.6946424669767075, "kl": 0.00368499755859375, "learning_rate": 9.999911134361297e-07, "loss": -0.0215, "num_tokens": 22150458.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9156558513641357, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.28532424094059317, "rewards/wordcountpos_reward/raw_geo/std": 0.3058888392113991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1046.3125, "completions/mean_terminated_length": 1046.3125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.10222044408881777, "frac_reward_zero_std": 0.0, "grad_norm": 3.9778437702769005, "kl": 0.003681182861328125, "learning_rate": 9.999890289419633e-07, "loss": -0.0145, "num_tokens": 22193959.0, "reward": -7.450580596923828e-09, "reward_std": 1.0465683937072754, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09833448892075872, "rewards/wordcountpos_reward/raw_geo/std": 0.06227146483146065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1458055529095489, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1077.1875, "completions/mean_terminated_length": 1077.1875, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.10242048409681936, "frac_reward_zero_std": 0.0, "grad_norm": 3.9496288190588325, "kl": 0.00339508056640625, "learning_rate": 9.999867250311034e-07, "loss": 0.0014, "num_tokens": 22242090.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6210155487060547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.26405872125492136, "rewards/wordcountpos_reward/raw_geo/std": 0.19703978584717102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23597708614436527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1138.875, "completions/mean_terminated_length": 1138.875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.10262052410482096, "frac_reward_zero_std": 0.0, "grad_norm": 2.34170030045483, "kl": 0.001407623291015625, "learning_rate": 9.999842017046729e-07, "loss": -0.0146, "num_tokens": 22289056.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9713587760925293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.061435437395757596, "rewards/wordcountpos_reward/raw_geo/std": 0.09058750077536384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1064.0, "completions/mean_terminated_length": 1001.71435546875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.10282056411282256, "frac_reward_zero_std": 0.0, "grad_norm": 3.5959829750472783, "kl": 0.0032806396484375, "learning_rate": 9.999814589639024e-07, "loss": 0.0096, "num_tokens": 22340968.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0442018508911133, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1458997781674531, "rewards/wordcountpos_reward/raw_geo/std": 0.11164110361722793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.21460558137093164, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1047.875, "completions/mean_terminated_length": 1047.875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.10302060412082416, "frac_reward_zero_std": 0.0, "grad_norm": 3.4190556889124637, "kl": 0.002979278564453125, "learning_rate": 9.99978496810129e-07, "loss": -0.0622, "num_tokens": 22386678.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9543663263320923, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05787473445859307, "rewards/wordcountpos_reward/raw_geo/std": 0.06255903084043969, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1060.0625, "completions/mean_terminated_length": 1060.0625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.10322064412882577, "frac_reward_zero_std": 0.0, "grad_norm": 2.77388045443082, "kl": 0.002544403076171875, "learning_rate": 9.999753152447975e-07, "loss": 0.0194, "num_tokens": 22423103.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8948169946670532, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009287352290140091, "rewards/wordcountpos_reward/raw_geo/std": 0.11516730967135275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1403.3125, "completions/mean_terminated_length": 1279.0, "completions/min_length": 1157.0, "completions/min_terminated_length": 1157.0, "epoch": 0.10342068413682737, "frac_reward_zero_std": 0.0, "grad_norm": 2.5565780589489937, "kl": 0.0024662017822265625, "learning_rate": 9.999719142694587e-07, "loss": 0.0128, "num_tokens": 22472068.0, "reward": 0.0, "reward_std": 0.7774474620819092, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.013580617634790352, "rewards/wordcountpos_reward/raw_geo/std": 0.04766143536972826, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 934.0625, "completions/mean_terminated_length": 676.8181762695312, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.10362072414482897, "frac_reward_zero_std": 0.0, "grad_norm": 3.537932239386057, "kl": 0.002445220947265625, "learning_rate": 9.999682938857714e-07, "loss": -0.2467, "num_tokens": 22506293.0, "reward": 2.9802322387695312e-08, "reward_std": 0.20520628988742828, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04070586292796668, "rewards/wordcountpos_reward/raw_geo/std": 0.044918410903867535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.2467192136222276, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 940.0625, "completions/mean_terminated_length": 940.0625, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.10382076415283056, "frac_reward_zero_std": 0.0, "grad_norm": 3.599234499546969, "kl": 0.0029621124267578125, "learning_rate": 9.999644540955006e-07, "loss": 0.0503, "num_tokens": 22545630.0, "reward": 0.0, "reward_std": 0.6917240619659424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10104033237679752, "rewards/wordcountpos_reward/raw_geo/std": 0.12197133913767746, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.16238956361284543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1009.75, "completions/mean_terminated_length": 1009.75, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.10402080416083216, "frac_reward_zero_std": 0.0, "grad_norm": 3.441418261607082, "kl": 0.00318145751953125, "learning_rate": 9.99960394900519e-07, "loss": 0.0482, "num_tokens": 22585258.0, "reward": 0.0, "reward_std": 1.0492933988571167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0460107454620935, "rewards/wordcountpos_reward/raw_geo/std": 0.08028027634463461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1207.0, "completions/mean_length": 1124.6875, "completions/mean_terminated_length": 1099.666748046875, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.10422084416883377, "frac_reward_zero_std": 0.0, "grad_norm": 3.5554381204838372, "kl": 0.004390716552734375, "learning_rate": 9.999561163028054e-07, "loss": 0.0093, "num_tokens": 22626725.0, "reward": 7.450580596923828e-09, "reward_std": 1.0660068988800049, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.015194260349199755, "rewards/wordcountpos_reward/raw_geo/std": 0.09218172898444489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1224.5625, "completions/mean_terminated_length": 1224.5625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.10442088417683537, "frac_reward_zero_std": 0.0, "grad_norm": 3.3437539621392323, "kl": 0.00363922119140625, "learning_rate": 9.999516183044463e-07, "loss": -0.056, "num_tokens": 22675174.0, "reward": 5.960464477539063e-08, "reward_std": 1.0049105882644653, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0551602976857556, "rewards/wordcountpos_reward/raw_geo/std": 0.07104750172665891, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 999.5625, "completions/mean_terminated_length": 999.5625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.10462092418483697, "frac_reward_zero_std": 0.0, "grad_norm": 3.4160380768407776, "kl": 0.00359344482421875, "learning_rate": 9.99946900907635e-07, "loss": 0.0147, "num_tokens": 22716367.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8914186954498291, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21583604101445772, "rewards/wordcountpos_reward/raw_geo/std": 0.1352627516273477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1132.4375, "completions/mean_terminated_length": 1132.4375, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.10482096419283857, "frac_reward_zero_std": 0.0, "grad_norm": 3.0421652743310243, "kl": 0.00336456298828125, "learning_rate": 9.999419641146717e-07, "loss": -0.0232, "num_tokens": 22763950.0, "reward": 0.0, "reward_std": 0.7214622497558594, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1031496434668357, "rewards/wordcountpos_reward/raw_geo/std": 0.3003652912898537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1139.933349609375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.10502100420084017, "frac_reward_zero_std": 0.0, "grad_norm": 3.2985774537755397, "kl": 0.00292205810546875, "learning_rate": 9.999368079279633e-07, "loss": 0.0276, "num_tokens": 22804541.0, "reward": 0.0, "reward_std": 0.6324073076248169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11208570193050091, "rewards/wordcountpos_reward/raw_geo/std": 0.10217915285146209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 965.0, "completions/mean_terminated_length": 965.0, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.10522104420884176, "frac_reward_zero_std": 0.0, "grad_norm": 2.7334234126256525, "kl": 0.002666473388671875, "learning_rate": 9.999314323500245e-07, "loss": 0.0143, "num_tokens": 22847605.0, "reward": 1.4901161193847656e-08, "reward_std": 1.004686951637268, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15369471949204236, "rewards/wordcountpos_reward/raw_geo/std": 0.22092310022499412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 933.9375, "completions/mean_terminated_length": 896.2000732421875, "completions/min_length": 502.0, "completions/min_terminated_length": 502.0, "epoch": 0.10542108421684337, "frac_reward_zero_std": 0.0, "grad_norm": 3.3299581380307455, "kl": 0.003543853759765625, "learning_rate": 9.99925837383476e-07, "loss": 0.0432, "num_tokens": 22878260.0, "reward": 0.0, "reward_std": 0.1896817535161972, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04412942377587502, "rewards/wordcountpos_reward/raw_geo/std": 0.04875373897272202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1865376538297196, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 896.125, "completions/mean_terminated_length": 896.125, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.10562112422484497, "frac_reward_zero_std": 0.0, "grad_norm": 2.9815122049620904, "kl": 0.002323150634765625, "learning_rate": 9.999200230310464e-07, "loss": -0.0269, "num_tokens": 22905574.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5550601482391357, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.020235067684602828, "rewards/wordcountpos_reward/raw_geo/std": 0.10951413785802963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 1311.25, "completions/mean_terminated_length": 1122.5, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.10582116423284657, "frac_reward_zero_std": 0.0, "grad_norm": 2.8255191501162926, "kl": 0.0035400390625, "learning_rate": 9.999139892955702e-07, "loss": -0.001, "num_tokens": 22961714.0, "reward": 0.0, "reward_std": 0.493524432182312, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.134572415716046, "rewards/wordcountpos_reward/raw_geo/std": 0.11200140348467674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503964, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1340.6875, "completions/mean_terminated_length": 1303.923095703125, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.10602120424084817, "frac_reward_zero_std": 0.0, "grad_norm": 2.6923523057866765, "kl": 0.00286102294921875, "learning_rate": 9.999077361799901e-07, "loss": 0.0378, "num_tokens": 23015805.0, "reward": 0.0, "reward_std": 0.8922038674354553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006554401549943335, "rewards/wordcountpos_reward/raw_geo/std": 0.27449323057238395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1131.25, "completions/mean_terminated_length": 1078.571533203125, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.10622124424884977, "frac_reward_zero_std": 0.0, "grad_norm": 1.9113579049657163, "kl": 0.00177001953125, "learning_rate": 9.999012636873545e-07, "loss": -0.018, "num_tokens": 23054177.0, "reward": 2.9802322387695312e-08, "reward_std": 0.716476559638977, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08867874949732134, "rewards/wordcountpos_reward/raw_geo/std": 0.07211955195163484, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1230.4375, "completions/mean_terminated_length": 1212.4666748046875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.10642128425685136, "frac_reward_zero_std": 0.0, "grad_norm": 2.3710401626716022, "kl": 0.00157928466796875, "learning_rate": 9.9989457182082e-07, "loss": -0.0389, "num_tokens": 23096224.0, "reward": 0.0, "reward_std": 0.4688728451728821, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010246156670225156, "rewards/wordcountpos_reward/raw_geo/std": 0.12366147439349398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1369.5625, "completions/mean_terminated_length": 1310.2728271484375, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.10662132426485298, "frac_reward_zero_std": 0.0, "grad_norm": 2.1150195028880274, "kl": 0.00162506103515625, "learning_rate": 9.998876605836494e-07, "loss": 0.0159, "num_tokens": 23143209.0, "reward": -2.9802322387695312e-08, "reward_std": 0.637698769569397, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03878263204132484, "rewards/wordcountpos_reward/raw_geo/std": 0.06292999265928811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1029.0, "completions/mean_terminated_length": 1029.0, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.10682136427285457, "frac_reward_zero_std": 0.0, "grad_norm": 3.456448530381217, "kl": 0.003177642822265625, "learning_rate": 9.998805299792124e-07, "loss": -0.0137, "num_tokens": 23179833.0, "reward": 0.0, "reward_std": 0.8021977543830872, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22649387280945132, "rewards/wordcountpos_reward/raw_geo/std": 0.26097069279213403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1067.5, "completions/mean_terminated_length": 1067.5, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.10702140428085617, "frac_reward_zero_std": 0.0, "grad_norm": 3.2135730709328763, "kl": 0.0035858154296875, "learning_rate": 9.998731800109863e-07, "loss": -0.0313, "num_tokens": 23229617.0, "reward": 0.0, "reward_std": 0.8154876232147217, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07405866024457258, "rewards/wordcountpos_reward/raw_geo/std": 0.1353684457165506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 1030.0, "completions/mean_terminated_length": 1030.0, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.10722144428885777, "frac_reward_zero_std": 0.0, "grad_norm": 3.617657023267098, "kl": 0.0034942626953125, "learning_rate": 9.998656106825547e-07, "loss": -0.0524, "num_tokens": 23272657.0, "reward": 7.450580596923828e-09, "reward_std": 1.04160475730896, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03418250336862778, "rewards/wordcountpos_reward/raw_geo/std": 0.0721857084891372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1266.6875, "completions/mean_terminated_length": 1033.375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.10742148429685937, "frac_reward_zero_std": 0.0, "grad_norm": 3.3265678703648494, "kl": 0.003875732421875, "learning_rate": 9.998578219976087e-07, "loss": -0.0381, "num_tokens": 23318188.0, "reward": 7.450580596923828e-09, "reward_std": 0.9139026999473572, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07845589613207468, "rewards/wordcountpos_reward/raw_geo/std": 0.2843549319551082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1095.1875, "completions/mean_terminated_length": 1095.1875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.10762152430486097, "frac_reward_zero_std": 0.0, "grad_norm": 4.021521752726582, "kl": 0.004184722900390625, "learning_rate": 9.998498139599457e-07, "loss": 0.0017, "num_tokens": 23368439.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4519929885864258, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.018939090902597606, "rewards/wordcountpos_reward/raw_geo/std": 0.13436037245769455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.141878925953186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1228.9375, "completions/mean_terminated_length": 1210.86669921875, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.10782156431286258, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098989124376464, "kl": 0.002986907958984375, "learning_rate": 9.99841586573471e-07, "loss": -0.0232, "num_tokens": 23418878.0, "reward": 0.0, "reward_std": 0.836676836013794, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1268032575343032, "rewards/wordcountpos_reward/raw_geo/std": 0.13357303577055174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16727666149669979, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1138.0625, "completions/mean_terminated_length": 1138.0625, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.10802160432086418, "frac_reward_zero_std": 0.0, "grad_norm": 3.6126260162838095, "kl": 0.0038909912109375, "learning_rate": 9.998331398421957e-07, "loss": 0.0191, "num_tokens": 23452279.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9278035759925842, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0030557194900407494, "rewards/wordcountpos_reward/raw_geo/std": 0.2449606092396285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1023.625, "completions/mean_terminated_length": 1023.625, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.10822164432886577, "frac_reward_zero_std": 0.0, "grad_norm": 3.672921574444718, "kl": 0.00301361083984375, "learning_rate": 9.99824473770239e-07, "loss": -0.0348, "num_tokens": 23483737.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8283048868179321, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.133992028642348, "rewards/wordcountpos_reward/raw_geo/std": 0.07518646880181934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823633, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1137.3125, "completions/mean_terminated_length": 972.45458984375, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.10842168433686737, "frac_reward_zero_std": 0.0, "grad_norm": 3.4632358619334407, "kl": 0.004032135009765625, "learning_rate": 9.99815588361826e-07, "loss": -0.0083, "num_tokens": 23533718.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8786888718605042, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07120426404965362, "rewards/wordcountpos_reward/raw_geo/std": 0.09404858115499712, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 978.375, "completions/mean_terminated_length": 978.375, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.10862172434486897, "frac_reward_zero_std": 0.0, "grad_norm": 4.101729557257998, "kl": 0.003948211669921875, "learning_rate": 9.998064836212896e-07, "loss": -0.0058, "num_tokens": 23567228.0, "reward": 0.0, "reward_std": 0.9299734830856323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029188210369984507, "rewards/wordcountpos_reward/raw_geo/std": 0.06431088732227853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1160.9375, "completions/mean_terminated_length": 1112.5, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.10882176435287057, "frac_reward_zero_std": 0.0, "grad_norm": 3.2615494952976345, "kl": 0.003200531005859375, "learning_rate": 9.997971595530694e-07, "loss": -0.0203, "num_tokens": 23607427.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8771929740905762, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10163403460258819, "rewards/wordcountpos_reward/raw_geo/std": 0.04956581953051861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1119.0, "completions/max_terminated_length": 1119.0, "completions/mean_length": 876.0625, "completions/mean_terminated_length": 876.0625, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.10902180436087218, "frac_reward_zero_std": 0.0, "grad_norm": 3.256012659081334, "kl": 0.00231170654296875, "learning_rate": 9.997876161617116e-07, "loss": -0.0396, "num_tokens": 23656380.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9862924814224243, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07055651000374884, "rewards/wordcountpos_reward/raw_geo/std": 0.07463422099545439, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1052.0, "completions/mean_terminated_length": 1052.0, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.10922184436887378, "frac_reward_zero_std": 0.0, "grad_norm": 3.727847257725504, "kl": 0.00359344482421875, "learning_rate": 9.997778534518698e-07, "loss": 0.0104, "num_tokens": 23689996.0, "reward": 0.0, "reward_std": 0.9218430519104004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10176819934330851, "rewards/wordcountpos_reward/raw_geo/std": 0.12292240999581669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1288.4375, "completions/mean_terminated_length": 1217.916748046875, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.10942188437687538, "frac_reward_zero_std": 0.0, "grad_norm": 2.963442401735094, "kl": 0.003566741943359375, "learning_rate": 9.99767871428304e-07, "loss": -0.0221, "num_tokens": 23742539.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0415704250335693, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026725128439603184, "rewards/wordcountpos_reward/raw_geo/std": 0.14487264209992345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 1064.375, "completions/mean_terminated_length": 1064.375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.10962192438487697, "frac_reward_zero_std": 0.0, "grad_norm": 2.339155250657867, "kl": 0.0017642974853515625, "learning_rate": 9.997576700958821e-07, "loss": -0.0058, "num_tokens": 23781257.0, "reward": -7.450580596923828e-09, "reward_std": 1.027381420135498, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.032476049463412024, "rewards/wordcountpos_reward/raw_geo/std": 0.08151936258716173, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 994.5625, "completions/mean_terminated_length": 994.5625, "completions/min_length": 505.0, "completions/min_terminated_length": 505.0, "epoch": 0.10982196439287857, "frac_reward_zero_std": 0.0, "grad_norm": 3.7747131511116776, "kl": 0.004329681396484375, "learning_rate": 9.99747249459578e-07, "loss": -0.0398, "num_tokens": 23815570.0, "reward": 0.0, "reward_std": 0.698041558265686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08488631805000081, "rewards/wordcountpos_reward/raw_geo/std": 0.11848568211004275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1128.1875, "completions/mean_terminated_length": 1128.1875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.11002200440088018, "frac_reward_zero_std": 0.0, "grad_norm": 3.4943022272665933, "kl": 0.003734588623046875, "learning_rate": 9.997366095244725e-07, "loss": 0.001, "num_tokens": 23859165.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0013880729675293, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0929478241120112, "rewards/wordcountpos_reward/raw_geo/std": 0.1799033920581418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1095.0, "completions/mean_terminated_length": 1095.0, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.11022204440888178, "frac_reward_zero_std": 0.0, "grad_norm": 3.8221874833604272, "kl": 0.00399017333984375, "learning_rate": 9.997257502957542e-07, "loss": -0.0108, "num_tokens": 23896117.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0622494220733643, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011306161111533933, "rewards/wordcountpos_reward/raw_geo/std": 0.03490152234471482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1190.3125, "completions/mean_terminated_length": 1146.071533203125, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.11042208441688338, "frac_reward_zero_std": 0.0, "grad_norm": 2.9491541845630147, "kl": 0.0034637451171875, "learning_rate": 9.997146717787177e-07, "loss": -0.0425, "num_tokens": 23938634.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9573972225189209, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0449865480633076, "rewards/wordcountpos_reward/raw_geo/std": 0.041069503586211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1262.125, "completions/mean_terminated_length": 1182.8333740234375, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.11062212442488498, "frac_reward_zero_std": 0.0, "grad_norm": 3.2322382550741486, "kl": 0.003429412841796875, "learning_rate": 9.997033739787652e-07, "loss": -0.0188, "num_tokens": 23981732.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6290683746337891, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11262019759432954, "rewards/wordcountpos_reward/raw_geo/std": 0.04860043192875778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14801151106386087, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1157.5625, "completions/mean_terminated_length": 1157.5625, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.11082216443288657, "frac_reward_zero_std": 0.0, "grad_norm": 2.8116699739298796, "kl": 0.00273895263671875, "learning_rate": 9.996918569014055e-07, "loss": -0.0332, "num_tokens": 24015741.0, "reward": 0.0, "reward_std": 0.7823160886764526, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19674041412945933, "rewards/wordcountpos_reward/raw_geo/std": 0.1502511192862497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/max_terminated_length": 917.0, "completions/mean_length": 788.5, "completions/mean_terminated_length": 788.5, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.11102220444088817, "frac_reward_zero_std": 0.0, "grad_norm": 3.285146713949514, "kl": 0.002384185791015625, "learning_rate": 9.996801205522545e-07, "loss": -0.024, "num_tokens": 24044933.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6573373079299927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03580027288163957, "rewards/wordcountpos_reward/raw_geo/std": 0.10357984831541751, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1051.4375, "completions/mean_terminated_length": 1051.4375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.11122224444888978, "frac_reward_zero_std": 0.0, "grad_norm": 2.943957123082875, "kl": 0.003726959228515625, "learning_rate": 9.996681649370347e-07, "loss": 0.0239, "num_tokens": 24085340.0, "reward": -5.960464477539063e-08, "reward_std": 0.7492737770080566, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13077286039206784, "rewards/wordcountpos_reward/raw_geo/std": 0.09374694514278689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1180.1875, "completions/mean_terminated_length": 1158.86669921875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.11142228445689138, "frac_reward_zero_std": 0.0, "grad_norm": 2.683342362523262, "kl": 0.002689361572265625, "learning_rate": 9.996559900615756e-07, "loss": -0.0095, "num_tokens": 24134055.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8977107405662537, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08564673215540983, "rewards/wordcountpos_reward/raw_geo/std": 0.07659207324220883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 943.8125, "completions/mean_terminated_length": 943.8125, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.11162232446489298, "frac_reward_zero_std": 0.0, "grad_norm": 4.189765804538905, "kl": 0.0048675537109375, "learning_rate": 9.996435959318142e-07, "loss": -0.0052, "num_tokens": 24185236.0, "reward": 0.0, "reward_std": 0.9948856830596924, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06460028780854046, "rewards/wordcountpos_reward/raw_geo/std": 0.09920312753771221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.11182236447289458, "frac_reward_zero_std": 0.0, "grad_norm": 3.198950422525801, "kl": 0.003292083740234375, "learning_rate": 9.996309825537934e-07, "loss": 0.002, "num_tokens": 24226614.0, "reward": 0.0, "reward_std": 0.5553363561630249, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022890796905422128, "rewards/wordcountpos_reward/raw_geo/std": 0.2262109657007015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 999.3125, "completions/mean_terminated_length": 999.3125, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.11202240448089618, "frac_reward_zero_std": 0.0, "grad_norm": 3.258890908105495, "kl": 0.003223419189453125, "learning_rate": 9.996181499336637e-07, "loss": -0.0497, "num_tokens": 24264323.0, "reward": -2.60770320892334e-08, "reward_std": 1.063173532485962, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11065609031325008, "rewards/wordcountpos_reward/raw_geo/std": 0.053565496049354094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1237.875, "completions/mean_terminated_length": 1220.4000244140625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.11222244448889777, "frac_reward_zero_std": 0.0, "grad_norm": 3.061252396367618, "kl": 0.0030460357666015625, "learning_rate": 9.996050980776829e-07, "loss": -0.0191, "num_tokens": 24305537.0, "reward": 0.0, "reward_std": 0.8597975969314575, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09832104803295717, "rewards/wordcountpos_reward/raw_geo/std": 0.08394843955418584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 904.0625, "completions/mean_terminated_length": 904.0625, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.11242248449689939, "frac_reward_zero_std": 0.0, "grad_norm": 3.376945163764293, "kl": 0.00327301025390625, "learning_rate": 9.995918269922143e-07, "loss": 0.0285, "num_tokens": 24342498.0, "reward": 0.0, "reward_std": 0.9232865571975708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028784287020987686, "rewards/wordcountpos_reward/raw_geo/std": 0.059373433572404787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1143.3125, "completions/mean_terminated_length": 1143.3125, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.11262252450490098, "frac_reward_zero_std": 0.0, "grad_norm": 1.8176050600575773, "kl": 0.0019559860229492188, "learning_rate": 9.995783366837291e-07, "loss": -0.0135, "num_tokens": 24376855.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9699097871780396, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07519102397061327, "rewards/wordcountpos_reward/raw_geo/std": 0.11153401002158146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1025.5, "completions/mean_terminated_length": 1025.5, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.11282256451290258, "frac_reward_zero_std": 0.0, "grad_norm": 3.8579631240466967, "kl": 0.00431060791015625, "learning_rate": 9.995646271588058e-07, "loss": -0.0266, "num_tokens": 24423711.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0112859010696411, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0684722708153597, "rewards/wordcountpos_reward/raw_geo/std": 0.07208256233553126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684815, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1121.0625, "completions/mean_terminated_length": 1095.800048828125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.11302260452090418, "frac_reward_zero_std": 0.0, "grad_norm": 3.2000097141700103, "kl": 0.00372314453125, "learning_rate": 9.995506984241287e-07, "loss": -0.0458, "num_tokens": 24464112.0, "reward": 0.0, "reward_std": 0.9509493708610535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.023468870239363615, "rewards/wordcountpos_reward/raw_geo/std": 0.07889781303655741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1210.75, "completions/mean_terminated_length": 1210.75, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.11322264452890578, "frac_reward_zero_std": 0.0, "grad_norm": 3.530942414378748, "kl": 0.004276275634765625, "learning_rate": 9.995365504864897e-07, "loss": 0.0087, "num_tokens": 24513772.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0324478149414062, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09899354500667781, "rewards/wordcountpos_reward/raw_geo/std": 0.0843227690429665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1286.5625, "completions/mean_terminated_length": 1256.071533203125, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.11342268453690738, "frac_reward_zero_std": 0.0, "grad_norm": 2.989295942963754, "kl": 0.00418853759765625, "learning_rate": 9.995221833527873e-07, "loss": -0.0365, "num_tokens": 24563717.0, "reward": 0.0, "reward_std": 0.6273993849754333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02518059984630949, "rewards/wordcountpos_reward/raw_geo/std": 0.06487796151302058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1210.5, "completions/mean_terminated_length": 1191.2000732421875, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.11362272454490899, "frac_reward_zero_std": 0.0, "grad_norm": 2.833800929068872, "kl": 0.0024013519287109375, "learning_rate": 9.995075970300272e-07, "loss": 0.0354, "num_tokens": 24605309.0, "reward": 0.0, "reward_std": 0.8578473329544067, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04966074312154036, "rewards/wordcountpos_reward/raw_geo/std": 0.06503495016918011, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1147.9375, "completions/mean_terminated_length": 1124.4666748046875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.11382276455291059, "frac_reward_zero_std": 0.0, "grad_norm": 2.7935809710912016, "kl": 0.00290679931640625, "learning_rate": 9.994927915253217e-07, "loss": 0.0203, "num_tokens": 24635852.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9936357736587524, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00745102399672297, "rewards/wordcountpos_reward/raw_geo/std": 0.08224213880335354, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1327.125, "completions/mean_terminated_length": 1223.4000244140625, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.11402280456091218, "frac_reward_zero_std": 0.0, "grad_norm": 2.654950700401816, "kl": 0.003543853759765625, "learning_rate": 9.994777668458896e-07, "loss": -0.0756, "num_tokens": 24692286.0, "reward": 0.0, "reward_std": 0.8973362445831299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11054539254611581, "rewards/wordcountpos_reward/raw_geo/std": 0.12082637303706929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1115.5, "completions/mean_terminated_length": 1115.5, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.11422284456891378, "frac_reward_zero_std": 0.0, "grad_norm": 2.9263528466020903, "kl": 0.003662109375, "learning_rate": 9.994625229990577e-07, "loss": -0.0472, "num_tokens": 24732222.0, "reward": 0.0, "reward_std": 0.8776353597640991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05033152705920437, "rewards/wordcountpos_reward/raw_geo/std": 0.07089268646780239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 993.4375, "completions/mean_terminated_length": 993.4375, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.11442288457691538, "frac_reward_zero_std": 0.0, "grad_norm": 3.159205315874274, "kl": 0.003131866455078125, "learning_rate": 9.994470599922585e-07, "loss": -0.023, "num_tokens": 24780205.0, "reward": 0.0, "reward_std": 0.9990221261978149, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04287313775383517, "rewards/wordcountpos_reward/raw_geo/std": 0.11187219434223016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1160.5625, "completions/mean_terminated_length": 1160.5625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.11462292458491698, "frac_reward_zero_std": 0.0, "grad_norm": 2.8619545402282514, "kl": 0.0032501220703125, "learning_rate": 9.99431377833032e-07, "loss": 0.0108, "num_tokens": 24828518.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8064454793930054, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04616483808873933, "rewards/wordcountpos_reward/raw_geo/std": 0.0556774454688619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1161.375, "completions/mean_terminated_length": 1161.375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.11482296459291859, "frac_reward_zero_std": 0.0, "grad_norm": 3.3056665248023487, "kl": 0.00386810302734375, "learning_rate": 9.994154765290247e-07, "loss": -0.0192, "num_tokens": 24869836.0, "reward": 0.0, "reward_std": 0.8694266080856323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07475566030167846, "rewards/wordcountpos_reward/raw_geo/std": 0.08803931553654668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1188.9375, "completions/mean_terminated_length": 1188.9375, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.11502300460092019, "frac_reward_zero_std": 0.0, "grad_norm": 2.2711858783873935, "kl": 0.0024394989013671875, "learning_rate": 9.993993560879905e-07, "loss": -0.032, "num_tokens": 24908227.0, "reward": 0.0, "reward_std": 0.748092770576477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024501221022122377, "rewards/wordcountpos_reward/raw_geo/std": 0.09323710150970747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 938.0625, "completions/mean_terminated_length": 938.0625, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.11522304460892178, "frac_reward_zero_std": 0.0, "grad_norm": 3.2028378879940096, "kl": 0.002849578857421875, "learning_rate": 9.993830165177895e-07, "loss": -0.0096, "num_tokens": 24947052.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9939234256744385, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05664545438746776, "rewards/wordcountpos_reward/raw_geo/std": 0.07679814285615115, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1175.25, "completions/mean_terminated_length": 1175.25, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.11542308461692338, "frac_reward_zero_std": 0.0, "grad_norm": 3.3913256548708586, "kl": 0.004215240478515625, "learning_rate": 9.99366457826389e-07, "loss": 0.0005, "num_tokens": 24990904.0, "reward": 0.0, "reward_std": 1.0443048477172852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0884725822662143, "rewards/wordcountpos_reward/raw_geo/std": 0.10828950413444931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 991.875, "completions/mean_terminated_length": 991.875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.11562312462492498, "frac_reward_zero_std": 0.0, "grad_norm": 3.910274966700335, "kl": 0.00441741943359375, "learning_rate": 9.99349680021863e-07, "loss": 0.0065, "num_tokens": 25027550.0, "reward": 0.0, "reward_std": 0.837898313999176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.017382526699437066, "rewards/wordcountpos_reward/raw_geo/std": 0.1234621302531474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12995725793078622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1044.6875, "completions/mean_terminated_length": 979.6428833007812, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.11582316463292658, "frac_reward_zero_std": 0.0, "grad_norm": 3.353134539448895, "kl": 0.0036258697509765625, "learning_rate": 9.993326831123928e-07, "loss": -0.0683, "num_tokens": 25071753.0, "reward": 0.0, "reward_std": 0.44138485193252563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03922920221087807, "rewards/wordcountpos_reward/raw_geo/std": 0.2234370592356771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1052.5, "completions/mean_terminated_length": 1052.5, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.11602320464092819, "frac_reward_zero_std": 0.0, "grad_norm": 3.600139110354816, "kl": 0.004913330078125, "learning_rate": 9.993154671062658e-07, "loss": -0.0378, "num_tokens": 25108041.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0393924713134766, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1792920419318415, "rewards/wordcountpos_reward/raw_geo/std": 0.1689911534755474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1076.9375, "completions/mean_terminated_length": 1076.9375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.11622324464892979, "frac_reward_zero_std": 0.0, "grad_norm": 3.8589304905133766, "kl": 0.0048980712890625, "learning_rate": 9.992980320118768e-07, "loss": -0.0393, "num_tokens": 25159096.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8237398266792297, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07778217797664813, "rewards/wordcountpos_reward/raw_geo/std": 0.18268940884829016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1030.875, "completions/mean_terminated_length": 1030.875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.11642328465693139, "frac_reward_zero_std": 0.0, "grad_norm": 2.8635549911104263, "kl": 0.0033092498779296875, "learning_rate": 9.992803778377272e-07, "loss": -0.0391, "num_tokens": 25194710.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9984513521194458, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07497094598922657, "rewards/wordcountpos_reward/raw_geo/std": 0.13422377795413132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 1041.375, "completions/mean_terminated_length": 1010.800048828125, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.11662332466493298, "frac_reward_zero_std": 0.0, "grad_norm": 2.775301710796813, "kl": 0.0035247802734375, "learning_rate": 9.99262504592425e-07, "loss": 0.0319, "num_tokens": 25227452.0, "reward": 2.9802322387695312e-08, "reward_std": 0.68071448802948, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06916690320467472, "rewards/wordcountpos_reward/raw_geo/std": 0.1174951869531071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 990.5625, "completions/mean_terminated_length": 990.5625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.11682336467293458, "frac_reward_zero_std": 0.0, "grad_norm": 2.995364025673235, "kl": 0.00260162353515625, "learning_rate": 9.99244412284686e-07, "loss": -0.0264, "num_tokens": 25269061.0, "reward": 0.0, "reward_std": 1.0686452388763428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061970819933237935, "rewards/wordcountpos_reward/raw_geo/std": 0.06606573332844305, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1233.0625, "completions/mean_terminated_length": 1215.2667236328125, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.1170234046809362, "frac_reward_zero_std": 0.0, "grad_norm": 3.3124435602370617, "kl": 0.004154205322265625, "learning_rate": 9.99226100923331e-07, "loss": -0.0023, "num_tokens": 25317702.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0391209125518799, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13717781753963867, "rewards/wordcountpos_reward/raw_geo/std": 0.06388630839861507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1066.625, "completions/mean_terminated_length": 1066.625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.11722344468893779, "frac_reward_zero_std": 0.0, "grad_norm": 3.5779085405472486, "kl": 0.00460052490234375, "learning_rate": 9.992075705172898e-07, "loss": 0.0003, "num_tokens": 25361648.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0536937713623047, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04481952401741872, "rewards/wordcountpos_reward/raw_geo/std": 0.09611909117815925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1135.125, "completions/mean_terminated_length": 1135.125, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.11742348469693939, "frac_reward_zero_std": 0.0, "grad_norm": 2.513543984785798, "kl": 0.00205230712890625, "learning_rate": 9.991888210755972e-07, "loss": 0.0057, "num_tokens": 25409746.0, "reward": 7.450580596923828e-09, "reward_std": 1.0209201574325562, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10751887698938156, "rewards/wordcountpos_reward/raw_geo/std": 0.08335179521983012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1030.625, "completions/mean_terminated_length": 1030.625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.11762352470494099, "frac_reward_zero_std": 0.0, "grad_norm": 2.6230283885135584, "kl": 0.00194549560546875, "learning_rate": 9.991698526073957e-07, "loss": 0.0075, "num_tokens": 25441860.0, "reward": 1.1175870895385742e-08, "reward_std": 0.9877455234527588, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11025377591357817, "rewards/wordcountpos_reward/raw_geo/std": 0.15686384576746407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1208.5625, "completions/mean_terminated_length": 1208.5625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.11782356471294259, "frac_reward_zero_std": 0.0, "grad_norm": 2.848728275486942, "kl": 0.0029144287109375, "learning_rate": 9.991506651219344e-07, "loss": 0.0081, "num_tokens": 25491981.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7379999160766602, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11822403131869448, "rewards/wordcountpos_reward/raw_geo/std": 0.2325377137103034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 986.625, "completions/mean_terminated_length": 868.1538696289062, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.11802360472094418, "frac_reward_zero_std": 0.0, "grad_norm": 2.8757895426961873, "kl": 0.003429412841796875, "learning_rate": 9.991312586285694e-07, "loss": -0.1393, "num_tokens": 25532535.0, "reward": 0.0, "reward_std": 0.796451985836029, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06713262369187364, "rewards/wordcountpos_reward/raw_geo/std": 0.05512106823014079, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17384539747207065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 951.8125, "completions/mean_terminated_length": 951.8125, "completions/min_length": 578.0, "completions/min_terminated_length": 578.0, "epoch": 0.1182236447289458, "frac_reward_zero_std": 0.0, "grad_norm": 2.554507410650862, "kl": 0.002689361572265625, "learning_rate": 9.99111633136763e-07, "loss": -0.0778, "num_tokens": 25563276.0, "reward": 0.0, "reward_std": 0.8934731483459473, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10302287957460166, "rewards/wordcountpos_reward/raw_geo/std": 0.13524604673773902, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1160.8125, "completions/mean_terminated_length": 1160.8125, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.1184236847369474, "frac_reward_zero_std": 0.0, "grad_norm": 3.4125326419007647, "kl": 0.004535675048828125, "learning_rate": 9.99091788656085e-07, "loss": 0.0118, "num_tokens": 25613153.0, "reward": 0.0, "reward_std": 0.896718442440033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13907460245307837, "rewards/wordcountpos_reward/raw_geo/std": 0.06629415874490989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1065.6875, "completions/mean_terminated_length": 1065.6875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.11862372474494899, "frac_reward_zero_std": 0.0, "grad_norm": 3.77619782251697, "kl": 0.0047760009765625, "learning_rate": 9.990717251962117e-07, "loss": 0.0018, "num_tokens": 25660716.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9303244352340698, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2007007850778933, "rewards/wordcountpos_reward/raw_geo/std": 0.12608709940807936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.11882376475295059, "frac_reward_zero_std": 0.0, "grad_norm": 3.2705632132229274, "kl": 0.003604888916015625, "learning_rate": 9.990514427669258e-07, "loss": 0.0016, "num_tokens": 25699530.0, "reward": 0.0, "reward_std": 0.8749842643737793, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014573999936885611, "rewards/wordcountpos_reward/raw_geo/std": 0.12330494898387871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1408308678285174, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1070.8125, "completions/mean_terminated_length": 1070.8125, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.11902380476095219, "frac_reward_zero_std": 0.0, "grad_norm": 3.613535943423098, "kl": 0.00507354736328125, "learning_rate": 9.990309413781174e-07, "loss": 0.0076, "num_tokens": 25740295.0, "reward": 5.960464477539063e-08, "reward_std": 0.8166874647140503, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07473252236098153, "rewards/wordcountpos_reward/raw_geo/std": 0.09498954456399569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1032.375, "completions/mean_terminated_length": 1032.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.11922384476895379, "frac_reward_zero_std": 0.0, "grad_norm": 3.8786717402680106, "kl": 0.0048828125, "learning_rate": 9.99010221039783e-07, "loss": -0.0254, "num_tokens": 25786157.0, "reward": 0.0, "reward_std": 0.721498966217041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10591279427986157, "rewards/wordcountpos_reward/raw_geo/std": 0.27952419364077286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1087.375, "completions/mean_terminated_length": 1087.375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.1194238847769554, "frac_reward_zero_std": 0.0, "grad_norm": 3.649881414576622, "kl": 0.00440216064453125, "learning_rate": 9.989892817620258e-07, "loss": 0.0169, "num_tokens": 25828275.0, "reward": 0.0, "reward_std": 1.045090675354004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1728914810093765, "rewards/wordcountpos_reward/raw_geo/std": 0.10331252127508549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1155.0625, "completions/mean_terminated_length": 1155.0625, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.119623924784957, "frac_reward_zero_std": 0.0, "grad_norm": 3.1587583548670577, "kl": 0.003627777099609375, "learning_rate": 9.989681235550562e-07, "loss": -0.0077, "num_tokens": 25870700.0, "reward": 0.0, "reward_std": 0.5376613140106201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10627275215566025, "rewards/wordcountpos_reward/raw_geo/std": 0.25543361580053725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 940.25, "completions/mean_terminated_length": 940.25, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.11982396479295859, "frac_reward_zero_std": 0.0, "grad_norm": 1.6594137725287148, "kl": 0.0004929304122924805, "learning_rate": 9.989467464291908e-07, "loss": 0.0172, "num_tokens": 25902776.0, "reward": 0.0, "reward_std": 0.9253208637237549, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10106021719825463, "rewards/wordcountpos_reward/raw_geo/std": 0.12120340532914584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.12002400480096019, "frac_reward_zero_std": 0.0, "grad_norm": 3.3025065823842503, "kl": 0.00357818603515625, "learning_rate": 9.989251503948531e-07, "loss": -0.0314, "num_tokens": 25941464.0, "reward": -4.470348358154297e-08, "reward_std": 1.0555447340011597, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0028509327567583483, "rewards/wordcountpos_reward/raw_geo/std": 0.11621934984659056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 859.3125, "completions/mean_terminated_length": 859.3125, "completions/min_length": 619.0, "completions/min_terminated_length": 619.0, "epoch": 0.12022404480896179, "frac_reward_zero_std": 0.0, "grad_norm": 1.8393055690737452, "kl": 0.0011968612670898438, "learning_rate": 9.989033354625734e-07, "loss": -0.0094, "num_tokens": 25973277.0, "reward": 5.960464477539063e-08, "reward_std": 0.4579020142555237, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08404742083151685, "rewards/wordcountpos_reward/raw_geo/std": 0.24157590795669093, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1163.272705078125, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.12042408481696339, "frac_reward_zero_std": 0.0, "grad_norm": 3.336937062895583, "kl": 0.004791259765625, "learning_rate": 9.988813016429892e-07, "loss": 0.0778, "num_tokens": 26018461.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7661669254302979, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09766727093301909, "rewards/wordcountpos_reward/raw_geo/std": 0.1446151190928726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792515, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 885.3125, "completions/mean_terminated_length": 885.3125, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 4.169743733588036, "kl": 0.00424957275390625, "learning_rate": 9.98859048946844e-07, "loss": 0.0144, "num_tokens": 26068330.0, "reward": -7.450580596923828e-09, "reward_std": 1.0524903535842896, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.061496268225116744, "rewards/wordcountpos_reward/raw_geo/std": 0.24759325476198477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 1240.0625, "completions/mean_terminated_length": 980.125, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.1208241648329666, "frac_reward_zero_std": 0.0, "grad_norm": 2.657923630667353, "kl": 0.0023164749145507812, "learning_rate": 9.98836577384988e-07, "loss": -0.0388, "num_tokens": 26114571.0, "reward": 0.0, "reward_std": 0.5082321166992188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10912996498151603, "rewards/wordcountpos_reward/raw_geo/std": 0.10358461671892392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1449776483411099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1093.5625, "completions/mean_terminated_length": 1066.4666748046875, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.1210242048409682, "frac_reward_zero_std": 0.0, "grad_norm": 3.063282695520789, "kl": 0.003543853759765625, "learning_rate": 9.98813886968379e-07, "loss": 0.0004, "num_tokens": 26165916.0, "reward": 0.0, "reward_std": 0.888217568397522, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06128346360775267, "rewards/wordcountpos_reward/raw_geo/std": 0.10585517944269796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1161.0625, "completions/mean_terminated_length": 1138.4666748046875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.12122424484896979, "frac_reward_zero_std": 0.0, "grad_norm": 2.4556025997089503, "kl": 0.00281524658203125, "learning_rate": 9.987909777080804e-07, "loss": -0.0251, "num_tokens": 26200245.0, "reward": -7.450580596923828e-09, "reward_std": 1.045180320739746, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05133146450931896, "rewards/wordcountpos_reward/raw_geo/std": 0.0773202615145305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1066.875, "completions/mean_terminated_length": 1038.0, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.12142428485697139, "frac_reward_zero_std": 0.0, "grad_norm": 3.5582325376975255, "kl": 0.004047393798828125, "learning_rate": 9.987678496152636e-07, "loss": -0.0373, "num_tokens": 26252051.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4290241599082947, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028119446965989305, "rewards/wordcountpos_reward/raw_geo/std": 0.07862825389079174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.15389991938004774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1070.5625, "completions/mean_terminated_length": 1070.5625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.12162432486497299, "frac_reward_zero_std": 0.0, "grad_norm": 3.5837425986678246, "kl": 0.00506591796875, "learning_rate": 9.987445027012051e-07, "loss": -0.0138, "num_tokens": 26291276.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0388023853302002, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0330544137889492, "rewards/wordcountpos_reward/raw_geo/std": 0.054684386172413016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1254.0, "completions/mean_terminated_length": 1254.0, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.1218243648729746, "frac_reward_zero_std": 0.0, "grad_norm": 2.8333074798072353, "kl": 0.003570556640625, "learning_rate": 9.987209369772897e-07, "loss": 0.0301, "num_tokens": 26335036.0, "reward": 0.0, "reward_std": 0.7927502989768982, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.28678040310294495, "rewards/wordcountpos_reward/raw_geo/std": 0.1652713501896006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 967.375, "completions/mean_terminated_length": 967.375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.1220244048809762, "frac_reward_zero_std": 0.0, "grad_norm": 1.6270450196842778, "kl": 0.001155853271484375, "learning_rate": 9.986971524550076e-07, "loss": -0.1561, "num_tokens": 26375154.0, "reward": 2.9802322387695312e-08, "reward_std": 1.042828917503357, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10004409930495685, "rewards/wordcountpos_reward/raw_geo/std": 0.03765844204343756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.20294133434366, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 946.1875, "completions/mean_terminated_length": 946.1875, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.1222244448889778, "frac_reward_zero_std": 0.0, "grad_norm": 3.3084233070482325, "kl": 0.004467010498046875, "learning_rate": 9.986731491459567e-07, "loss": -0.064, "num_tokens": 26418821.0, "reward": -5.960464477539063e-08, "reward_std": 0.6977008581161499, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12472395408433459, "rewards/wordcountpos_reward/raw_geo/std": 0.12125465444713009, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 946.625, "completions/mean_terminated_length": 946.625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.1224244848969794, "frac_reward_zero_std": 0.0, "grad_norm": 3.9088394766494114, "kl": 0.004638671875, "learning_rate": 9.986489270618406e-07, "loss": -0.0041, "num_tokens": 26460183.0, "reward": 0.0, "reward_std": 0.8044872879981995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004338921517332701, "rewards/wordcountpos_reward/raw_geo/std": 0.22816820582093225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1213.375, "completions/mean_terminated_length": 1041.4000244140625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.12262452490498099, "frac_reward_zero_std": 0.0, "grad_norm": 2.276849364455299, "kl": 0.003658294677734375, "learning_rate": 9.986244862144706e-07, "loss": -0.0062, "num_tokens": 26503469.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9733021259307861, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07596876596732975, "rewards/wordcountpos_reward/raw_geo/std": 0.08366904544231499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 939.875, "completions/mean_terminated_length": 939.875, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.1228245649129826, "frac_reward_zero_std": 0.0, "grad_norm": 3.980765546849583, "kl": 0.00487518310546875, "learning_rate": 9.98599826615764e-07, "loss": 0.0105, "num_tokens": 26533035.0, "reward": 0.0, "reward_std": 0.6733291745185852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014356009050411659, "rewards/wordcountpos_reward/raw_geo/std": 0.017854352485406063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.1230246049209842, "frac_reward_zero_std": 0.0, "grad_norm": 3.893811188225448, "kl": 0.0047454833984375, "learning_rate": 9.985749482777447e-07, "loss": 0.0027, "num_tokens": 26572165.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0360522270202637, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04120507683636414, "rewards/wordcountpos_reward/raw_geo/std": 0.1340279500130438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1051.125, "completions/mean_terminated_length": 1051.125, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.1232246449289858, "frac_reward_zero_std": 0.0, "grad_norm": 3.0674836647561516, "kl": 0.0035247802734375, "learning_rate": 9.985498512125438e-07, "loss": -0.055, "num_tokens": 26604031.0, "reward": 0.0, "reward_std": 0.9960306286811829, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06768566342256044, "rewards/wordcountpos_reward/raw_geo/std": 0.08527267630277444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1111.1875, "completions/mean_terminated_length": 1085.2667236328125, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.1234246849369874, "frac_reward_zero_std": 0.0, "grad_norm": 2.799652363512096, "kl": 0.003025054931640625, "learning_rate": 9.985245354323985e-07, "loss": 0.0033, "num_tokens": 26643394.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0629085302352905, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03913411351888386, "rewards/wordcountpos_reward/raw_geo/std": 0.04767410521977387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1163.6875, "completions/mean_terminated_length": 1086.076904296875, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.123624724944989, "frac_reward_zero_std": 0.0, "grad_norm": 3.364450538142126, "kl": 0.00482940673828125, "learning_rate": 9.984990009496531e-07, "loss": 0.0129, "num_tokens": 26687109.0, "reward": -7.450580596923828e-09, "reward_std": 1.063175082206726, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11514919795469915, "rewards/wordcountpos_reward/raw_geo/std": 0.06422640611243444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1176.875, "completions/mean_terminated_length": 1155.3333740234375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.12382476495299059, "frac_reward_zero_std": 0.0, "grad_norm": 3.289606833352297, "kl": 0.00428009033203125, "learning_rate": 9.984732477767583e-07, "loss": 0.0533, "num_tokens": 26733875.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6770787239074707, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17012176081642488, "rewards/wordcountpos_reward/raw_geo/std": 0.08161800095832152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1006.125, "completions/mean_terminated_length": 1006.125, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.1240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.76935441621033, "kl": 0.005126953125, "learning_rate": 9.984472759262715e-07, "loss": 0.0109, "num_tokens": 26768773.0, "reward": -1.862645149230957e-08, "reward_std": 1.0479369163513184, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04200083852984359, "rewards/wordcountpos_reward/raw_geo/std": 0.07508393691671464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.16141733350404336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1205.9375, "completions/mean_terminated_length": 1163.9285888671875, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.1242248449689938, "frac_reward_zero_std": 0.0, "grad_norm": 3.0235433804391216, "kl": 0.004535675048828125, "learning_rate": 9.984210854108563e-07, "loss": -0.045, "num_tokens": 26818532.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8869249820709229, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04740029056353276, "rewards/wordcountpos_reward/raw_geo/std": 0.18896733059278176, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 987.8125, "completions/mean_terminated_length": 987.8125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.1244248849769954, "frac_reward_zero_std": 0.0, "grad_norm": 3.6696230262281166, "kl": 0.0048065185546875, "learning_rate": 9.98394676243284e-07, "loss": 0.0263, "num_tokens": 26852241.0, "reward": -5.960464477539063e-08, "reward_std": 0.9135434031486511, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02709717430519235, "rewards/wordcountpos_reward/raw_geo/std": 0.03719682286016161, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1036.4375, "completions/mean_terminated_length": 1036.4375, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.124624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 3.4522917928057533, "kl": 0.004734039306640625, "learning_rate": 9.983680484364312e-07, "loss": -0.0031, "num_tokens": 26901280.0, "reward": 0.0, "reward_std": 0.6508069038391113, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06533817396710875, "rewards/wordcountpos_reward/raw_geo/std": 0.36540788595245494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1118.0, "completions/max_terminated_length": 1118.0, "completions/mean_length": 918.0625, "completions/mean_terminated_length": 918.0625, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.1248249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 2.579914600881425, "kl": 0.003475189208984375, "learning_rate": 9.98341202003282e-07, "loss": -0.0463, "num_tokens": 26940993.0, "reward": -2.9802322387695312e-08, "reward_std": 0.44443702697753906, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0626511853748225, "rewards/wordcountpos_reward/raw_geo/std": 0.13308606533254844, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1046.75, "completions/mean_terminated_length": 1046.75, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.1250250050010002, "frac_reward_zero_std": 0.0, "grad_norm": 3.401655397624474, "kl": 0.0059356689453125, "learning_rate": 9.983141369569269e-07, "loss": 0.0306, "num_tokens": 26987189.0, "reward": 0.0, "reward_std": 0.5568276643753052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18044071231883343, "rewards/wordcountpos_reward/raw_geo/std": 0.08709184195541693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1070.125, "completions/mean_terminated_length": 1070.125, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.1252250450090018, "frac_reward_zero_std": 0.0, "grad_norm": 2.698697801681276, "kl": 0.003925323486328125, "learning_rate": 9.982868533105628e-07, "loss": 0.0315, "num_tokens": 27033975.0, "reward": 0.0, "reward_std": 0.909441351890564, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05037267939674402, "rewards/wordcountpos_reward/raw_geo/std": 0.06662628101123586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1171.8125, "completions/mean_terminated_length": 1149.933349609375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.1254250850170034, "frac_reward_zero_std": 0.0, "grad_norm": 3.158058147482287, "kl": 0.0053558349609375, "learning_rate": 9.982593510774934e-07, "loss": -0.0165, "num_tokens": 27086668.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5071286559104919, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05255425439892183, "rewards/wordcountpos_reward/raw_geo/std": 0.07159454359449532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1012.8125, "completions/mean_terminated_length": 1012.8125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.125625125025005, "frac_reward_zero_std": 0.0, "grad_norm": 2.783341504357864, "kl": 0.003597259521484375, "learning_rate": 9.98231630271129e-07, "loss": -0.0026, "num_tokens": 27124089.0, "reward": 3.725290298461914e-09, "reward_std": 1.0599117279052734, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02447395541263964, "rewards/wordcountpos_reward/raw_geo/std": 0.112989011009317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195308, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1051.75, "completions/mean_terminated_length": 1051.75, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.12582516503300661, "frac_reward_zero_std": 0.0, "grad_norm": 3.1794291553351517, "kl": 0.00438690185546875, "learning_rate": 9.982036909049862e-07, "loss": -0.0209, "num_tokens": 27164317.0, "reward": 0.0, "reward_std": 0.8919508457183838, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0604702040709929, "rewards/wordcountpos_reward/raw_geo/std": 0.049235970855206486, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1026.9375, "completions/mean_terminated_length": 1026.9375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.1260252050410082, "frac_reward_zero_std": 0.0, "grad_norm": 3.4953474271926464, "kl": 0.0059356689453125, "learning_rate": 9.981755329926885e-07, "loss": -0.159, "num_tokens": 27205148.0, "reward": 0.0, "reward_std": 0.897917628288269, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10790208297456225, "rewards/wordcountpos_reward/raw_geo/std": 0.06736836454728154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1211.9375, "completions/mean_terminated_length": 1192.7333984375, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.1262252450490098, "frac_reward_zero_std": 0.0, "grad_norm": 3.185263780268412, "kl": 0.004650115966796875, "learning_rate": 9.981471565479657e-07, "loss": 0.0485, "num_tokens": 27257843.0, "reward": -4.470348358154297e-08, "reward_std": 0.9278661012649536, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0111015291209416, "rewards/wordcountpos_reward/raw_geo/std": 0.08132531991087623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1236.3125, "completions/mean_terminated_length": 1198.6429443359375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.1264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 2.836985042513744, "kl": 0.0047607421875, "learning_rate": 9.981185615846547e-07, "loss": -0.0114, "num_tokens": 27300824.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0083012580871582, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07239144298136019, "rewards/wordcountpos_reward/raw_geo/std": 0.14773738321392313, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1191.0625, "completions/mean_terminated_length": 1050.6363525390625, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.126625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 3.0724060569436813, "kl": 0.00531768798828125, "learning_rate": 9.980897481166977e-07, "loss": -0.0809, "num_tokens": 27350177.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7770811319351196, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0933141414397164, "rewards/wordcountpos_reward/raw_geo/std": 0.16391916675149165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 1102.4375, "completions/mean_terminated_length": 1102.4375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.1268253650730146, "frac_reward_zero_std": 0.0, "grad_norm": 3.1225414018257385, "kl": 0.003955841064453125, "learning_rate": 9.980607161581453e-07, "loss": 0.0015, "num_tokens": 27387200.0, "reward": -7.450580596923828e-09, "reward_std": 1.0623854398727417, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.018069248445128507, "rewards/wordcountpos_reward/raw_geo/std": 0.0804868379396318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 989.625, "completions/mean_terminated_length": 989.625, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.1270254050810162, "frac_reward_zero_std": 0.0, "grad_norm": 2.3134785508629094, "kl": 0.0017547607421875, "learning_rate": 9.980314657231528e-07, "loss": -0.0176, "num_tokens": 27417098.0, "reward": -2.9802322387695312e-08, "reward_std": 0.38615089654922485, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015131948342750749, "rewards/wordcountpos_reward/raw_geo/std": 0.20097860617405225, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1057.25, "completions/mean_terminated_length": 1057.25, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.1272254450890178, "frac_reward_zero_std": 0.0, "grad_norm": 3.2924162942987945, "kl": 0.003551483154296875, "learning_rate": 9.980019968259832e-07, "loss": 0.0028, "num_tokens": 27453838.0, "reward": 0.0, "reward_std": 0.9801968336105347, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16367684594306642, "rewards/wordcountpos_reward/raw_geo/std": 0.15655037214855191, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1082.375, "completions/mean_terminated_length": 1082.375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.1274254850970194, "frac_reward_zero_std": 0.0, "grad_norm": 3.302973903215818, "kl": 0.00485992431640625, "learning_rate": 9.979723094810057e-07, "loss": -0.0025, "num_tokens": 27489916.0, "reward": 0.0, "reward_std": 0.8288685083389282, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08162939565676491, "rewards/wordcountpos_reward/raw_geo/std": 0.2209381204447789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1168.875, "completions/mean_terminated_length": 1058.5, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.127625525105021, "frac_reward_zero_std": 0.0, "grad_norm": 3.1607709508175716, "kl": 0.004749298095703125, "learning_rate": 9.979424037026958e-07, "loss": 0.0638, "num_tokens": 27536842.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0416853427886963, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12464990888913079, "rewards/wordcountpos_reward/raw_geo/std": 0.14783292213404392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1040.9375, "completions/mean_terminated_length": 1040.9375, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.1278255651130226, "frac_reward_zero_std": 0.0, "grad_norm": 3.5474317398854116, "kl": 0.00756072998046875, "learning_rate": 9.979122795056359e-07, "loss": -0.0581, "num_tokens": 27585081.0, "reward": -7.450580596923828e-09, "reward_std": 1.0642361640930176, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04133626728577251, "rewards/wordcountpos_reward/raw_geo/std": 0.22301695708502775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1042.25, "completions/mean_terminated_length": 1042.25, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.1280256051210242, "frac_reward_zero_std": 0.0, "grad_norm": 3.1010985284945747, "kl": 0.00457000732421875, "learning_rate": 9.978819369045144e-07, "loss": -0.0277, "num_tokens": 27634117.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9609041810035706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08407491682913257, "rewards/wordcountpos_reward/raw_geo/std": 0.07843184608381397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1099.0, "completions/max_terminated_length": 1099.0, "completions/mean_length": 802.6875, "completions/mean_terminated_length": 802.6875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.12822564512902582, "frac_reward_zero_std": 0.0, "grad_norm": 3.3952886007573824, "kl": 0.00311279296875, "learning_rate": 9.978513759141268e-07, "loss": 0.0302, "num_tokens": 27673512.0, "reward": 0.0, "reward_std": 0.9679796099662781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061750412664388535, "rewards/wordcountpos_reward/raw_geo/std": 0.048486349365749956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.20471298788158854, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1276.6875, "completions/mean_terminated_length": 1202.25, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.1284256851370274, "frac_reward_zero_std": 0.0, "grad_norm": 3.3113531173578883, "kl": 0.0063934326171875, "learning_rate": 9.978205965493745e-07, "loss": 0.0183, "num_tokens": 27721371.0, "reward": 0.0, "reward_std": 0.8945666551589966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10797679995523339, "rewards/wordcountpos_reward/raw_geo/std": 0.08869039820513923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1097.7333984375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.128625725145029, "frac_reward_zero_std": 0.0, "grad_norm": 3.246690819953855, "kl": 0.00644683837890625, "learning_rate": 9.97789598825266e-07, "loss": 0.0141, "num_tokens": 27770041.0, "reward": 0.0, "reward_std": 0.9412751197814941, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10239796086178213, "rewards/wordcountpos_reward/raw_geo/std": 0.1343962627298669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477447, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1061.9375, "completions/mean_terminated_length": 1032.7333984375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.1288257651530306, "frac_reward_zero_std": 0.0, "grad_norm": 3.453987658257602, "kl": 0.00827789306640625, "learning_rate": 9.977583827569156e-07, "loss": 0.0144, "num_tokens": 27822056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3767787218093872, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07131612583711619, "rewards/wordcountpos_reward/raw_geo/std": 0.07171287484603306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1108.3125, "completions/mean_terminated_length": 1108.3125, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.1290258051610322, "frac_reward_zero_std": 0.0, "grad_norm": 3.327779265101733, "kl": 0.00616455078125, "learning_rate": 9.977269483595446e-07, "loss": -0.0291, "num_tokens": 27868237.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9169150590896606, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021763183621388668, "rewards/wordcountpos_reward/raw_geo/std": 0.33525742826908483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1223.375, "completions/mean_terminated_length": 1204.933349609375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.12922584516903382, "frac_reward_zero_std": 0.0, "grad_norm": 2.570055547889121, "kl": 0.0032806396484375, "learning_rate": 9.976952956484806e-07, "loss": -0.0291, "num_tokens": 27915787.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9806604385375977, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054030268740384677, "rewards/wordcountpos_reward/raw_geo/std": 0.13011261092611223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1073.0, "completions/mean_terminated_length": 1073.0, "completions/min_length": 692.0, "completions/min_terminated_length": 692.0, "epoch": 0.1294258851770354, "frac_reward_zero_std": 0.0, "grad_norm": 2.941630411573727, "kl": 0.003917694091796875, "learning_rate": 9.976634246391574e-07, "loss": -0.0228, "num_tokens": 27957611.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8614913821220398, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3734023307367399, "rewards/wordcountpos_reward/raw_geo/std": 0.1143456490050215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1312.0, "completions/mean_terminated_length": 1299.4666748046875, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.12962592518503702, "frac_reward_zero_std": 0.0, "grad_norm": 1.7540730861096523, "kl": 0.002410888671875, "learning_rate": 9.976313353471158e-07, "loss": 0.007, "num_tokens": 28003027.0, "reward": 0.0, "reward_std": 0.8530524969100952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17744547359621543, "rewards/wordcountpos_reward/raw_geo/std": 0.129647648986365, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1143.625, "completions/mean_terminated_length": 1119.86669921875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.1298259651930386, "frac_reward_zero_std": 0.0, "grad_norm": 3.400985168708016, "kl": 0.00530242919921875, "learning_rate": 9.975990277880021e-07, "loss": -0.0, "num_tokens": 28051293.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0485291481018066, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10346131727919117, "rewards/wordcountpos_reward/raw_geo/std": 0.17460243592844008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1212.0, "completions/mean_terminated_length": 1170.857177734375, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.1300260052010402, "frac_reward_zero_std": 0.0, "grad_norm": 3.3510074609565645, "kl": 0.00614166259765625, "learning_rate": 9.9756650197757e-07, "loss": 0.0712, "num_tokens": 28102165.0, "reward": -7.450580596923828e-09, "reward_std": 1.0400404930114746, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06826006684558153, "rewards/wordcountpos_reward/raw_geo/std": 0.12794528340479538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1095.5625, "completions/mean_terminated_length": 1095.5625, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.1302260452090418, "frac_reward_zero_std": 0.0, "grad_norm": 3.075463854845879, "kl": 0.004222869873046875, "learning_rate": 9.975337579316792e-07, "loss": -0.0176, "num_tokens": 28146190.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0490224361419678, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15050571812914165, "rewards/wordcountpos_reward/raw_geo/std": 0.06928872750146535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1085.125, "completions/mean_terminated_length": 1085.125, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.1304260852170434, "frac_reward_zero_std": 0.0, "grad_norm": 2.1562108504239106, "kl": 0.00295257568359375, "learning_rate": 9.975007956662958e-07, "loss": -0.0086, "num_tokens": 28186128.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9152057766914368, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.035800482873339694, "rewards/wordcountpos_reward/raw_geo/std": 0.05670761202596771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 893.375, "completions/mean_terminated_length": 893.375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.13062612522504502, "frac_reward_zero_std": 0.0, "grad_norm": 3.2702118853201094, "kl": 0.004428863525390625, "learning_rate": 9.974676151974924e-07, "loss": -0.084, "num_tokens": 28232990.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9846186637878418, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0414241074887866, "rewards/wordcountpos_reward/raw_geo/std": 0.04514410942858959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1196.0625, "completions/mean_terminated_length": 1094.75, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.1308261652330466, "frac_reward_zero_std": 0.0, "grad_norm": 3.3719955995987636, "kl": 0.004749298095703125, "learning_rate": 9.97434216541448e-07, "loss": 0.0341, "num_tokens": 28277695.0, "reward": 0.0, "reward_std": 0.6445549726486206, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06249388830725872, "rewards/wordcountpos_reward/raw_geo/std": 0.2274145114862602, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1195.0, "completions/mean_terminated_length": 1195.0, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.13102620524104822, "frac_reward_zero_std": 0.0, "grad_norm": 2.5589524029496737, "kl": 0.003894805908203125, "learning_rate": 9.974005997144479e-07, "loss": -0.0219, "num_tokens": 28317327.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5491290092468262, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21602396028552, "rewards/wordcountpos_reward/raw_geo/std": 0.12123997475876024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1188.25, "completions/mean_terminated_length": 1084.3333740234375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.1312262452490498, "frac_reward_zero_std": 0.0, "grad_norm": 3.4310007665360565, "kl": 0.0052032470703125, "learning_rate": 9.973667647328835e-07, "loss": 0.0166, "num_tokens": 28355931.0, "reward": 0.0, "reward_std": 1.0214509963989258, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.041002112446201305, "rewards/wordcountpos_reward/raw_geo/std": 0.10049333673600769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.094182643679026, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1018.6875, "completions/mean_terminated_length": 986.6000366210938, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.1314262852570514, "frac_reward_zero_std": 0.0, "grad_norm": 3.5411403747541748, "kl": 0.00519561767578125, "learning_rate": 9.973327116132535e-07, "loss": -0.0389, "num_tokens": 28401726.0, "reward": 0.0, "reward_std": 0.863051176071167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04423193519333256, "rewards/wordcountpos_reward/raw_geo/std": 0.059488267363860876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1092.1875, "completions/mean_terminated_length": 1065.0, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.13162632526505302, "frac_reward_zero_std": 0.0, "grad_norm": 3.4776309441757878, "kl": 0.005279541015625, "learning_rate": 9.972984403721617e-07, "loss": 0.0415, "num_tokens": 28443193.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9779000282287598, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22567542424594958, "rewards/wordcountpos_reward/raw_geo/std": 0.122762869851685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1045.933349609375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.1318263652730546, "frac_reward_zero_std": 0.0, "grad_norm": 2.469400012411404, "kl": 0.005725860595703125, "learning_rate": 9.972639510263196e-07, "loss": -0.0658, "num_tokens": 28481150.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9975144863128662, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14032742480850013, "rewards/wordcountpos_reward/raw_geo/std": 0.07303950883303385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1270.625, "completions/mean_terminated_length": 1217.6923828125, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.13202640528105622, "frac_reward_zero_std": 0.0, "grad_norm": 3.234344237069856, "kl": 0.006439208984375, "learning_rate": 9.972292435925436e-07, "loss": -0.0107, "num_tokens": 28525928.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7132295370101929, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08332458437535507, "rewards/wordcountpos_reward/raw_geo/std": 0.2423254158475807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1082.4375, "completions/mean_terminated_length": 1082.4375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.1322264452890578, "frac_reward_zero_std": 0.0, "grad_norm": 3.1700540986054953, "kl": 0.00649261474609375, "learning_rate": 9.971943180877578e-07, "loss": -0.0196, "num_tokens": 28565135.0, "reward": 0.0, "reward_std": 0.9550644755363464, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.040341277385431855, "rewards/wordcountpos_reward/raw_geo/std": 0.0712034567734884, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087684, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1090.25, "completions/mean_terminated_length": 1090.25, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.13242648529705942, "frac_reward_zero_std": 0.0, "grad_norm": 3.50163149675052, "kl": 0.00691986083984375, "learning_rate": 9.97159174528992e-07, "loss": -0.0083, "num_tokens": 28614627.0, "reward": 0.0, "reward_std": 0.7552675604820251, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04568235324103673, "rewards/wordcountpos_reward/raw_geo/std": 0.11796028086435413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 947.3125, "completions/mean_terminated_length": 910.4667358398438, "completions/min_length": 636.0, "completions/min_terminated_length": 636.0, "epoch": 0.132626525305061, "frac_reward_zero_std": 0.0, "grad_norm": 3.845323115483345, "kl": 0.0078277587890625, "learning_rate": 9.97123812933382e-07, "loss": -0.0435, "num_tokens": 28652640.0, "reward": 0.0, "reward_std": 1.0025032758712769, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23569810293731933, "rewards/wordcountpos_reward/raw_geo/std": 0.3384914477541847, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1192.4375, "completions/mean_terminated_length": 1171.933349609375, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.1328265653130626, "frac_reward_zero_std": 0.0, "grad_norm": 3.304372477741638, "kl": 0.00621795654296875, "learning_rate": 9.970882333181705e-07, "loss": 0.0087, "num_tokens": 28692511.0, "reward": -1.4901161193847656e-08, "reward_std": 1.048022747039795, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006602699671060386, "rewards/wordcountpos_reward/raw_geo/std": 0.03777902085105849, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 991.0625, "completions/mean_terminated_length": 957.1333618164062, "completions/min_length": 611.0, "completions/min_terminated_length": 611.0, "epoch": 0.13302660532106422, "frac_reward_zero_std": 0.0, "grad_norm": 2.8131400706173237, "kl": 0.00263214111328125, "learning_rate": 9.970524357007062e-07, "loss": -0.011, "num_tokens": 28725016.0, "reward": -7.450580596923828e-09, "reward_std": 1.0291789770126343, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05325052424207354, "rewards/wordcountpos_reward/raw_geo/std": 0.10540294845922886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1371.1875, "completions/mean_terminated_length": 1328.25, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1332266453290658, "frac_reward_zero_std": 0.0, "grad_norm": 2.9863509949236753, "kl": 0.00701904296875, "learning_rate": 9.970164200984443e-07, "loss": -0.0186, "num_tokens": 28775331.0, "reward": 0.0, "reward_std": 0.7168537378311157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.240252370502287, "rewards/wordcountpos_reward/raw_geo/std": 0.1105960634530435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717426, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1063.375, "completions/mean_terminated_length": 1063.375, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.13342668533706742, "frac_reward_zero_std": 0.0, "grad_norm": 3.142214576501579, "kl": 0.0045623779296875, "learning_rate": 9.96980186528946e-07, "loss": 0.0055, "num_tokens": 28814041.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0318855047225952, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10171903810984985, "rewards/wordcountpos_reward/raw_geo/std": 0.16995287458764927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1163.75, "completions/mean_terminated_length": 1115.71435546875, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.133626725345069, "frac_reward_zero_std": 0.0, "grad_norm": 3.253372187753335, "kl": 0.00621795654296875, "learning_rate": 9.969437350098792e-07, "loss": -0.0263, "num_tokens": 28863349.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0098556280136108, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05990797004300492, "rewards/wordcountpos_reward/raw_geo/std": 0.10188959470906758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1006.6875, "completions/mean_terminated_length": 1006.6875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.13382676535307061, "frac_reward_zero_std": 0.0, "grad_norm": 3.4764647965790667, "kl": 0.0055389404296875, "learning_rate": 9.969070655590176e-07, "loss": 0.0314, "num_tokens": 28906232.0, "reward": 0.0, "reward_std": 0.8855102062225342, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03819761022437591, "rewards/wordcountpos_reward/raw_geo/std": 0.15089505994649535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1247.9375, "completions/mean_terminated_length": 1163.916748046875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.13402680536107223, "frac_reward_zero_std": 0.0, "grad_norm": 2.4846989439490264, "kl": 0.0050048828125, "learning_rate": 9.968701781942416e-07, "loss": -0.1845, "num_tokens": 28955095.0, "reward": 0.0, "reward_std": 0.9443983435630798, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1520027507303361, "rewards/wordcountpos_reward/raw_geo/std": 0.1367502075335975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.19474579822405907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 980.6875, "completions/mean_terminated_length": 980.6875, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.1342268453690738, "frac_reward_zero_std": 0.0, "grad_norm": 3.268405289422698, "kl": 0.004718780517578125, "learning_rate": 9.968330729335373e-07, "loss": -0.0262, "num_tokens": 28991298.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9413201808929443, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 3.4723568840691957e-06, "rewards/wordcountpos_reward/raw_geo/std": 0.16894296312032356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1355373393953503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1055.4375, "completions/mean_terminated_length": 1055.4375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.13442688537707542, "frac_reward_zero_std": 0.0, "grad_norm": 3.614158830986014, "kl": 0.0110931396484375, "learning_rate": 9.967957497949977e-07, "loss": -0.0303, "num_tokens": 29041065.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8191667795181274, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23981766865371792, "rewards/wordcountpos_reward/raw_geo/std": 0.31159167452390013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1164.6875, "completions/mean_terminated_length": 1116.7857666015625, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.134626925385077, "frac_reward_zero_std": 0.0, "grad_norm": 3.476532545378162, "kl": 0.008636474609375, "learning_rate": 9.967582087968216e-07, "loss": -0.0427, "num_tokens": 29091876.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6427819728851318, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07378987834921148, "rewards/wordcountpos_reward/raw_geo/std": 0.1400941718133652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1280.375, "completions/mean_terminated_length": 1249.0, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.13482696539307862, "frac_reward_zero_std": 0.0, "grad_norm": 3.0970888606647975, "kl": 0.0067901611328125, "learning_rate": 9.967204499573144e-07, "loss": 0.015, "num_tokens": 29128946.0, "reward": 0.0, "reward_std": 0.7480720281600952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21311493454456562, "rewards/wordcountpos_reward/raw_geo/std": 0.10351996953299572, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1207.3125, "completions/mean_terminated_length": 1187.800048828125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.1350270054010802, "frac_reward_zero_std": 0.0, "grad_norm": 2.6745800353908464, "kl": 0.004638671875, "learning_rate": 9.96682473294887e-07, "loss": -0.0239, "num_tokens": 29172743.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9135780334472656, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12160674771147578, "rewards/wordcountpos_reward/raw_geo/std": 0.17578519741606502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1144.375, "completions/mean_terminated_length": 1144.375, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.13522704540908181, "frac_reward_zero_std": 0.0, "grad_norm": 1.9476825347610727, "kl": 0.00518798828125, "learning_rate": 9.966442788280575e-07, "loss": 0.0118, "num_tokens": 29217605.0, "reward": 7.450580596923828e-09, "reward_std": 1.002336025238037, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16464423071540582, "rewards/wordcountpos_reward/raw_geo/std": 0.1532432998548804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1188.5, "completions/mean_terminated_length": 1167.7333984375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.13542708541708343, "frac_reward_zero_std": 0.0, "grad_norm": 3.3755287836118857, "kl": 0.0082550048828125, "learning_rate": 9.966058665754494e-07, "loss": -0.0668, "num_tokens": 29270741.0, "reward": 0.0, "reward_std": 0.6608616709709167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23340531596752148, "rewards/wordcountpos_reward/raw_geo/std": 0.14174178380742677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 952.375, "completions/mean_terminated_length": 952.375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.135627125425085, "frac_reward_zero_std": 0.0, "grad_norm": 2.7624594871829484, "kl": 0.004543304443359375, "learning_rate": 9.965672365557928e-07, "loss": 0.0228, "num_tokens": 29310939.0, "reward": 7.450580596923828e-09, "reward_std": 1.0148861408233643, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0006055944986289026, "rewards/wordcountpos_reward/raw_geo/std": 0.03041083385154536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1156.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 862.3125, "completions/mean_terminated_length": 862.3125, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.13582716543308662, "frac_reward_zero_std": 0.0, "grad_norm": 4.40865964743789, "kl": 0.0063629150390625, "learning_rate": 9.96528388787924e-07, "loss": 0.0238, "num_tokens": 29348792.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8102241158485413, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02602516324429506, "rewards/wordcountpos_reward/raw_geo/std": 0.1502081507064256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1163.5, "completions/mean_terminated_length": 1141.0667724609375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.1360272054410882, "frac_reward_zero_std": 0.0, "grad_norm": 2.4148042308996507, "kl": 0.0050048828125, "learning_rate": 9.964893232907847e-07, "loss": -0.0181, "num_tokens": 29390840.0, "reward": 0.0, "reward_std": 0.7928985357284546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02477742068729108, "rewards/wordcountpos_reward/raw_geo/std": 0.07537619440262582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16549588783075214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 942.5, "completions/mean_terminated_length": 942.5, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.13622724544908982, "frac_reward_zero_std": 0.0, "grad_norm": 3.8849979245723825, "kl": 0.00829315185546875, "learning_rate": 9.964500400834242e-07, "loss": -0.041, "num_tokens": 29436464.0, "reward": 1.4901161193847656e-08, "reward_std": 1.043959379196167, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22229496293426804, "rewards/wordcountpos_reward/raw_geo/std": 0.10262288181478094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1086.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 978.0625, "completions/mean_terminated_length": 978.0625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.13642728545709143, "frac_reward_zero_std": 0.0, "grad_norm": 2.308428565876076, "kl": 0.00312042236328125, "learning_rate": 9.964105391849968e-07, "loss": -0.0188, "num_tokens": 29469161.0, "reward": 0.0, "reward_std": 0.7602218389511108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18210586500254414, "rewards/wordcountpos_reward/raw_geo/std": 0.18249962833621194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1043.5625, "completions/mean_terminated_length": 1043.5625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.136627325465093, "frac_reward_zero_std": 0.0, "grad_norm": 3.347932976671946, "kl": 0.00540924072265625, "learning_rate": 9.963708206147635e-07, "loss": 0.0248, "num_tokens": 29519402.0, "reward": 0.0, "reward_std": 0.5571043491363525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02263269429901924, "rewards/wordcountpos_reward/raw_geo/std": 0.030775433610970383, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1045.3125, "completions/mean_terminated_length": 1045.3125, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.13682736547309463, "frac_reward_zero_std": 0.0, "grad_norm": 3.3655345045803515, "kl": 0.00521087646484375, "learning_rate": 9.96330884392091e-07, "loss": 0.0309, "num_tokens": 29549455.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8928267955780029, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3070642784715, "rewards/wordcountpos_reward/raw_geo/std": 0.11910458814440902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1008.8125, "completions/mean_terminated_length": 895.4615478515625, "completions/min_length": 466.0, "completions/min_terminated_length": 466.0, "epoch": 0.1370274054810962, "frac_reward_zero_std": 0.0, "grad_norm": 3.7113494184727935, "kl": 0.00677490234375, "learning_rate": 9.962907305364528e-07, "loss": -0.0274, "num_tokens": 29587364.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7454226016998291, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.26854109123445347, "rewards/wordcountpos_reward/raw_geo/std": 0.28636950916948584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1108.0625, "completions/mean_terminated_length": 1108.0625, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.13722744548909782, "frac_reward_zero_std": 0.0, "grad_norm": 3.216933367268093, "kl": 0.00492095947265625, "learning_rate": 9.962503590674276e-07, "loss": -0.0155, "num_tokens": 29631389.0, "reward": 0.0, "reward_std": 0.8615149855613708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.055286124124821374, "rewards/wordcountpos_reward/raw_geo/std": 0.10167580749230112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 940.3125, "completions/mean_terminated_length": 940.3125, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.13742748549709943, "frac_reward_zero_std": 0.0, "grad_norm": 3.243173977804894, "kl": 0.00577545166015625, "learning_rate": 9.962097700047008e-07, "loss": 0.0252, "num_tokens": 29680354.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7638654112815857, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054423855935719107, "rewards/wordcountpos_reward/raw_geo/std": 0.17388371951199952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1229.25, "completions/mean_terminated_length": 1211.2000732421875, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.13762752550510102, "frac_reward_zero_std": 0.0, "grad_norm": 2.2922082506539088, "kl": 0.003414154052734375, "learning_rate": 9.96168963368064e-07, "loss": -0.0195, "num_tokens": 29717958.0, "reward": -3.725290298461914e-09, "reward_std": 1.060276985168457, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16127617426569996, "rewards/wordcountpos_reward/raw_geo/std": 0.09716465677783157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1065.75, "completions/mean_terminated_length": 1036.800048828125, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.13782756551310263, "frac_reward_zero_std": 0.0, "grad_norm": 3.1986067944426546, "kl": 0.00455474853515625, "learning_rate": 9.96127939177415e-07, "loss": -0.0063, "num_tokens": 29751562.0, "reward": 0.0, "reward_std": 0.9840933084487915, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05837005965237116, "rewards/wordcountpos_reward/raw_geo/std": 0.2589641194830504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 1164.0, "completions/mean_terminated_length": 1164.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.1380276055211042, "frac_reward_zero_std": 0.0, "grad_norm": 2.518302959899328, "kl": 0.005924224853515625, "learning_rate": 9.960866974527567e-07, "loss": 0.0198, "num_tokens": 29792418.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7731319069862366, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053263698647258016, "rewards/wordcountpos_reward/raw_geo/std": 0.0793153271995976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390615, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1184.3125, "completions/mean_terminated_length": 1184.3125, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.13822764552910582, "frac_reward_zero_std": 0.0, "grad_norm": 2.40174455281427, "kl": 0.0048370361328125, "learning_rate": 9.960452382141992e-07, "loss": 0.0074, "num_tokens": 29840911.0, "reward": -5.960464477539063e-08, "reward_std": 0.9344772696495056, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05228336251761571, "rewards/wordcountpos_reward/raw_geo/std": 0.06650767942038709, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1221.0, "completions/mean_terminated_length": 1202.4000244140625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.1384276855371074, "frac_reward_zero_std": 0.0, "grad_norm": 2.5302045187486986, "kl": 0.00519561767578125, "learning_rate": 9.960035614819581e-07, "loss": -0.0785, "num_tokens": 29883799.0, "reward": -5.960464477539063e-08, "reward_std": 0.7695052623748779, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0537022261445076, "rewards/wordcountpos_reward/raw_geo/std": 0.06477909293607671, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1014.25, "completions/mean_terminated_length": 981.86669921875, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.13862772554510902, "frac_reward_zero_std": 0.0, "grad_norm": 3.0059344255770997, "kl": 0.0048980712890625, "learning_rate": 9.959616672763551e-07, "loss": 0.0266, "num_tokens": 29925883.0, "reward": 0.0, "reward_std": 0.8059113025665283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16419694865627432, "rewards/wordcountpos_reward/raw_geo/std": 0.07599964126845102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1003.625, "completions/mean_terminated_length": 1003.625, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.13882776555311063, "frac_reward_zero_std": 0.0, "grad_norm": 3.5259171549541213, "kl": 0.00650787353515625, "learning_rate": 9.959195556178182e-07, "loss": -0.0015, "num_tokens": 29965309.0, "reward": -7.450580596923828e-09, "reward_std": 1.0141409635543823, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13576232252922812, "rewards/wordcountpos_reward/raw_geo/std": 0.14926357078338248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1151.5, "completions/mean_terminated_length": 1151.5, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.13902780556111222, "frac_reward_zero_std": 0.0, "grad_norm": 3.466923960735747, "kl": 0.00620269775390625, "learning_rate": 9.95877226526881e-07, "loss": -0.0206, "num_tokens": 30009213.0, "reward": 0.0, "reward_std": 0.9081611633300781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0882971282508942, "rewards/wordcountpos_reward/raw_geo/std": 0.13655016171677775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033237, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1072.25, "completions/mean_terminated_length": 1043.7333984375, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.13922784556911383, "frac_reward_zero_std": 0.0, "grad_norm": 2.877632305498997, "kl": 0.00446319580078125, "learning_rate": 9.958346800241833e-07, "loss": -0.0789, "num_tokens": 30051697.0, "reward": -7.450580596923828e-09, "reward_std": 0.972619354724884, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.021725031389030623, "rewards/wordcountpos_reward/raw_geo/std": 0.03661588391024977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1167.125, "completions/mean_terminated_length": 1119.571533203125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.1394278855771154, "frac_reward_zero_std": 0.0, "grad_norm": 3.054419664841711, "kl": 0.0088653564453125, "learning_rate": 9.957919161304714e-07, "loss": -0.0369, "num_tokens": 30103739.0, "reward": 0.0, "reward_std": 1.0333055257797241, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0057409345208391735, "rewards/wordcountpos_reward/raw_geo/std": 0.10382577793180792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1240.375, "completions/mean_terminated_length": 1223.0667724609375, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.13962792558511702, "frac_reward_zero_std": 0.0, "grad_norm": 3.292924062157794, "kl": 0.0104522705078125, "learning_rate": 9.957489348665968e-07, "loss": -0.0531, "num_tokens": 30155473.0, "reward": 0.0, "reward_std": 0.6561108827590942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02371765219262611, "rewards/wordcountpos_reward/raw_geo/std": 0.11354867448102532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1150.0, "completions/mean_terminated_length": 1150.0, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.13982796559311864, "frac_reward_zero_std": 0.0, "grad_norm": 2.6640346128681567, "kl": 0.004730224609375, "learning_rate": 9.957057362535175e-07, "loss": -0.0007, "num_tokens": 30201673.0, "reward": 0.0, "reward_std": 0.6640896797180176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03628437041867817, "rewards/wordcountpos_reward/raw_geo/std": 0.09626891586953862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1130.9375, "completions/mean_terminated_length": 1106.3333740234375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.14002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 2.73126955211748, "kl": 0.0034275054931640625, "learning_rate": 9.956623203122972e-07, "loss": 0.0458, "num_tokens": 30252296.0, "reward": 0.0, "reward_std": 0.800506591796875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12573467228988663, "rewards/wordcountpos_reward/raw_geo/std": 0.10527396540553197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752094, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1103.75, "completions/mean_terminated_length": 1103.75, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.14022804560912183, "frac_reward_zero_std": 0.0, "grad_norm": 2.559370707789578, "kl": 0.00408172607421875, "learning_rate": 9.956186870641057e-07, "loss": -0.039, "num_tokens": 30290108.0, "reward": 0.0, "reward_std": 0.5168172121047974, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01391879818602983, "rewards/wordcountpos_reward/raw_geo/std": 0.07164518020495673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1303.4375, "completions/mean_terminated_length": 1303.4375, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.14042808561712342, "frac_reward_zero_std": 0.0, "grad_norm": 2.7192980531774356, "kl": 0.005126953125, "learning_rate": 9.955748365302192e-07, "loss": 0.0117, "num_tokens": 30336259.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8233336210250854, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10136648307876828, "rewards/wordcountpos_reward/raw_geo/std": 0.15914360706719755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1040.5625, "completions/mean_terminated_length": 1009.9334106445312, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.14062812562512503, "frac_reward_zero_std": 0.0, "grad_norm": 3.9485772058384923, "kl": 0.007110595703125, "learning_rate": 9.955307687320188e-07, "loss": -0.0008, "num_tokens": 30379068.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8600001335144043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11216864698780595, "rewards/wordcountpos_reward/raw_geo/std": 0.022922390218119638, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14707015206910487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 997.5, "completions/mean_terminated_length": 997.5, "completions/min_length": 720.0, "completions/min_terminated_length": 720.0, "epoch": 0.1408281656331266, "frac_reward_zero_std": 0.0, "grad_norm": 3.493676699900381, "kl": 0.0056915283203125, "learning_rate": 9.954864836909928e-07, "loss": -0.0479, "num_tokens": 30418876.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9076901078224182, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0071481929813572, "rewards/wordcountpos_reward/raw_geo/std": 0.22921296867951962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1142.625, "completions/mean_terminated_length": 1142.625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.14102820564112822, "frac_reward_zero_std": 0.0, "grad_norm": 2.9639948982159057, "kl": 0.0070953369140625, "learning_rate": 9.954419814287342e-07, "loss": -0.0238, "num_tokens": 30467718.0, "reward": 0.0, "reward_std": 0.767283022403717, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17308119246840772, "rewards/wordcountpos_reward/raw_geo/std": 0.11223524603087588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1158.5, "completions/mean_terminated_length": 1158.5, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.14122824564912984, "frac_reward_zero_std": 0.0, "grad_norm": 3.8843145288951173, "kl": 0.00765228271484375, "learning_rate": 9.953972619669427e-07, "loss": -0.0442, "num_tokens": 30516190.0, "reward": 0.0, "reward_std": 0.9820117354393005, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02992030303461632, "rewards/wordcountpos_reward/raw_geo/std": 0.1281319358892255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1130.625, "completions/mean_terminated_length": 1130.625, "completions/min_length": 989.0, "completions/min_terminated_length": 989.0, "epoch": 0.14142828565713142, "frac_reward_zero_std": 0.0, "grad_norm": 3.238582098036172, "kl": 0.00623321533203125, "learning_rate": 9.953523253274238e-07, "loss": -0.0129, "num_tokens": 30559456.0, "reward": 5.960464477539063e-08, "reward_std": 0.7889923453330994, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00939426314301261, "rewards/wordcountpos_reward/raw_geo/std": 0.09751999661939352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1350.4375, "completions/mean_terminated_length": 1200.875, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.14162832566513303, "frac_reward_zero_std": 0.0, "grad_norm": 1.978293402370285, "kl": 0.0026874542236328125, "learning_rate": 9.953071715320888e-07, "loss": -0.0039, "num_tokens": 30610111.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0519108772277832, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1741235838635124, "rewards/wordcountpos_reward/raw_geo/std": 0.04403836839294878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1219.75, "completions/mean_terminated_length": 1179.71435546875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.14182836567313462, "frac_reward_zero_std": 0.0, "grad_norm": 3.333057252728575, "kl": 0.00664520263671875, "learning_rate": 9.952618006029548e-07, "loss": 0.0278, "num_tokens": 30650387.0, "reward": 0.0, "reward_std": 0.5008547306060791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14025724785888286, "rewards/wordcountpos_reward/raw_geo/std": 0.08485098297679669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1102.1875, "completions/mean_terminated_length": 1102.1875, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.14202840568113623, "frac_reward_zero_std": 0.0, "grad_norm": 2.6774147607652132, "kl": 0.0044403076171875, "learning_rate": 9.95216212562145e-07, "loss": 0.0082, "num_tokens": 30698294.0, "reward": 5.960464477539063e-08, "reward_std": 0.6835624575614929, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06311745003414003, "rewards/wordcountpos_reward/raw_geo/std": 0.13948938680928943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1092.0625, "completions/mean_terminated_length": 906.6364135742188, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.14222844568913784, "frac_reward_zero_std": 0.0, "grad_norm": 3.0546089944500445, "kl": 0.00469207763671875, "learning_rate": 9.951704074318883e-07, "loss": -0.0249, "num_tokens": 30744439.0, "reward": -2.9802322387695312e-08, "reward_std": 0.628603458404541, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10739659977576964, "rewards/wordcountpos_reward/raw_geo/std": 0.07779821935513596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1170.0, "completions/mean_terminated_length": 1148.0, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.14242848569713942, "frac_reward_zero_std": 0.0, "grad_norm": 3.862725587204926, "kl": 0.0083770751953125, "learning_rate": 9.951243852345196e-07, "loss": 0.0181, "num_tokens": 30793999.0, "reward": 7.450580596923828e-09, "reward_std": 1.0435220003128052, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.35213315369770076, "rewards/wordcountpos_reward/raw_geo/std": 0.05763721943742654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1393.3125, "completions/mean_terminated_length": 1329.300048828125, "completions/min_length": 1219.0, "completions/min_terminated_length": 1219.0, "epoch": 0.14262852570514103, "frac_reward_zero_std": 0.0, "grad_norm": 3.2670437030095085, "kl": 0.00617218017578125, "learning_rate": 9.95078145992479e-07, "loss": -0.0166, "num_tokens": 30842468.0, "reward": 3.725290298461914e-09, "reward_std": 1.0632672309875488, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07208442551012503, "rewards/wordcountpos_reward/raw_geo/std": 0.2627307507639042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1206.5, "completions/mean_terminated_length": 1206.5, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.14282856571314262, "frac_reward_zero_std": 0.0, "grad_norm": 2.9241609126731363, "kl": 0.005245208740234375, "learning_rate": 9.950316897283137e-07, "loss": 0.0034, "num_tokens": 30888204.0, "reward": 0.0, "reward_std": 0.7585967183113098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053196519062599604, "rewards/wordcountpos_reward/raw_geo/std": 0.12888531258144625, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1091.3125, "completions/mean_terminated_length": 1091.3125, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.14302860572114423, "frac_reward_zero_std": 0.0, "grad_norm": 2.771106836625132, "kl": 0.0041351318359375, "learning_rate": 9.949850164646756e-07, "loss": 0.0285, "num_tokens": 30934737.0, "reward": 0.0, "reward_std": 0.8382663726806641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14603299511443718, "rewards/wordcountpos_reward/raw_geo/std": 0.11244707488262339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1047.8125, "completions/mean_terminated_length": 1017.666748046875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.14322864572914584, "frac_reward_zero_std": 0.0, "grad_norm": 3.426852639639691, "kl": 0.0054168701171875, "learning_rate": 9.949381262243225e-07, "loss": 0.0132, "num_tokens": 30976606.0, "reward": 5.960464477539063e-08, "reward_std": 0.5217741131782532, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16936015769452664, "rewards/wordcountpos_reward/raw_geo/std": 0.185387502350504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1139.0, "completions/mean_length": 983.25, "completions/mean_terminated_length": 948.800048828125, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.14342868573714743, "frac_reward_zero_std": 0.0, "grad_norm": 3.885401042416404, "kl": 0.00608062744140625, "learning_rate": 9.94891019030119e-07, "loss": 0.0353, "num_tokens": 31013138.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9144117832183838, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03453104921911609, "rewards/wordcountpos_reward/raw_geo/std": 0.17395946248572433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1201.4375, "completions/mean_terminated_length": 1181.533447265625, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.14362872574514904, "frac_reward_zero_std": 0.0, "grad_norm": 2.5575164662328134, "kl": 0.00366973876953125, "learning_rate": 9.948436949050343e-07, "loss": -0.0143, "num_tokens": 31059481.0, "reward": 1.862645149230957e-08, "reward_std": 1.0672872066497803, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01463547907012421, "rewards/wordcountpos_reward/raw_geo/std": 0.07692061541498454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1062.125, "completions/mean_terminated_length": 1062.125, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.14382876575315062, "frac_reward_zero_std": 0.0, "grad_norm": 2.654257872990229, "kl": 0.0034637451171875, "learning_rate": 9.94796153872144e-07, "loss": 0.0295, "num_tokens": 31109275.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9103240370750427, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05953108948483753, "rewards/wordcountpos_reward/raw_geo/std": 0.06727902500973598, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1058.5625, "completions/mean_terminated_length": 1029.1334228515625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.14402880576115223, "frac_reward_zero_std": 0.0, "grad_norm": 3.5922169500272374, "kl": 0.007232666015625, "learning_rate": 9.947483959546293e-07, "loss": 0.0068, "num_tokens": 31150284.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9292308688163757, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09370068959943902, "rewards/wordcountpos_reward/raw_geo/std": 0.13843699606577375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1044.4375, "completions/mean_terminated_length": 1044.4375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.14422884576915382, "frac_reward_zero_std": 0.0, "grad_norm": 2.9323599712703636, "kl": 0.00423431396484375, "learning_rate": 9.94700421175777e-07, "loss": -0.0123, "num_tokens": 31195763.0, "reward": 0.0, "reward_std": 0.49132829904556274, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17546977703420125, "rewards/wordcountpos_reward/raw_geo/std": 0.41867445773614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1312.0, "completions/mean_terminated_length": 1268.615478515625, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.14442888577715543, "frac_reward_zero_std": 0.0, "grad_norm": 3.317288321785738, "kl": 0.0067138671875, "learning_rate": 9.946522295589801e-07, "loss": 0.0305, "num_tokens": 31244731.0, "reward": 0.0, "reward_std": 0.5410593748092651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2706143842692229, "rewards/wordcountpos_reward/raw_geo/std": 0.2736734719163004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 880.4375, "completions/mean_terminated_length": 880.4375, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.14462892578515704, "frac_reward_zero_std": 0.0, "grad_norm": 3.5854368586298704, "kl": 0.0055999755859375, "learning_rate": 9.94603821127737e-07, "loss": -0.0311, "num_tokens": 31279458.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9416681528091431, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1467374778123789, "rewards/wordcountpos_reward/raw_geo/std": 0.05100519292653718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 921.3125, "completions/mean_terminated_length": 921.3125, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.14482896579315863, "frac_reward_zero_std": 0.0, "grad_norm": 3.503033907202783, "kl": 0.005615234375, "learning_rate": 9.945551959056518e-07, "loss": -0.0035, "num_tokens": 31329031.0, "reward": 0.0, "reward_std": 0.7892371416091919, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10875541293612151, "rewards/wordcountpos_reward/raw_geo/std": 0.08962381679720997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0910840068085298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1255.0625, "completions/mean_terminated_length": 1108.0999755859375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.14502900580116024, "frac_reward_zero_std": 0.0, "grad_norm": 3.1389930286765964, "kl": 0.00562286376953125, "learning_rate": 9.945063539164344e-07, "loss": 0.0488, "num_tokens": 31385048.0, "reward": -1.4901161193847656e-08, "reward_std": 1.040879487991333, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07732004099190525, "rewards/wordcountpos_reward/raw_geo/std": 0.060054882866430806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1216.86669921875, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.14522904580916182, "frac_reward_zero_std": 0.0, "grad_norm": 3.1695657531737464, "kl": 0.006683349609375, "learning_rate": 9.944572951839003e-07, "loss": -0.0612, "num_tokens": 31433281.0, "reward": 0.0, "reward_std": 0.7444674968719482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0407388972403369, "rewards/wordcountpos_reward/raw_geo/std": 0.05057335402229615, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1189.0, "completions/mean_terminated_length": 1117.2308349609375, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.14542908581716343, "frac_reward_zero_std": 0.0, "grad_norm": 2.79028113516693, "kl": 0.00530242919921875, "learning_rate": 9.94408019731971e-07, "loss": -0.0603, "num_tokens": 31487097.0, "reward": 0.0, "reward_std": 0.8400046825408936, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1559528611625961, "rewards/wordcountpos_reward/raw_geo/std": 0.15576773285069773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1259.25, "completions/mean_terminated_length": 1243.2000732421875, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.14562912582516505, "frac_reward_zero_std": 0.0, "grad_norm": 3.5740344672616278, "kl": 0.0075836181640625, "learning_rate": 9.94358527584673e-07, "loss": -0.0366, "num_tokens": 31530533.0, "reward": 0.0, "reward_std": 0.7851624488830566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025958064734766265, "rewards/wordcountpos_reward/raw_geo/std": 0.2049492670871542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1224.0625, "completions/mean_terminated_length": 1009.4444580078125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.14582916583316663, "frac_reward_zero_std": 0.0, "grad_norm": 2.9136454977175377, "kl": 0.0062255859375, "learning_rate": 9.943088187661394e-07, "loss": 0.0031, "num_tokens": 31579678.0, "reward": 0.0, "reward_std": 0.7163681387901306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2224011053626009, "rewards/wordcountpos_reward/raw_geo/std": 0.6999590374254487, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1042.8125, "completions/mean_terminated_length": 1042.8125, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.14602920584116824, "frac_reward_zero_std": 0.0, "grad_norm": 3.3735998435710277, "kl": 0.00632476806640625, "learning_rate": 9.94258893300608e-07, "loss": -0.0878, "num_tokens": 31620995.0, "reward": -1.862645149230957e-09, "reward_std": 0.9324001669883728, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.008924512394700927, "rewards/wordcountpos_reward/raw_geo/std": 0.022503535407724942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1184.875, "completions/mean_terminated_length": 1163.86669921875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.14622924584916983, "frac_reward_zero_std": 0.0, "grad_norm": 3.3061452594883525, "kl": 0.0060272216796875, "learning_rate": 9.942087512124232e-07, "loss": -0.0084, "num_tokens": 31658377.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5267903804779053, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025016088143917627, "rewards/wordcountpos_reward/raw_geo/std": 0.10311097625767426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1080.3125, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.14642928585717144, "frac_reward_zero_std": 0.0, "grad_norm": 3.4756112170773035, "kl": 0.0081024169921875, "learning_rate": 9.94158392526034e-07, "loss": 0.0011, "num_tokens": 31691022.0, "reward": 1.4901161193847656e-08, "reward_std": 0.93564772605896, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013113661731094118, "rewards/wordcountpos_reward/raw_geo/std": 0.029943185327959396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 922.3125, "completions/mean_terminated_length": 922.3125, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.14662932586517302, "frac_reward_zero_std": 0.0, "grad_norm": 2.9405757475850285, "kl": 0.00597381591796875, "learning_rate": 9.941078172659955e-07, "loss": 0.0108, "num_tokens": 31728115.0, "reward": 0.0, "reward_std": 0.7895586490631104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05735508340614822, "rewards/wordcountpos_reward/raw_geo/std": 0.21383923615787387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1044.125, "completions/mean_terminated_length": 1044.125, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.14682936587317463, "frac_reward_zero_std": 0.0, "grad_norm": 3.221447547916469, "kl": 0.005321502685546875, "learning_rate": 9.94057025456969e-07, "loss": -0.004, "num_tokens": 31764693.0, "reward": 0.0, "reward_std": 1.0384291410446167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027346554022988852, "rewards/wordcountpos_reward/raw_geo/std": 0.05012748370006775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1293.625, "completions/mean_terminated_length": 1169.800048828125, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.14702940588117624, "frac_reward_zero_std": 0.0, "grad_norm": 2.565880363332157, "kl": 0.004779815673828125, "learning_rate": 9.940060171237204e-07, "loss": 0.0044, "num_tokens": 31810935.0, "reward": 0.0, "reward_std": 0.9921772480010986, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08258514967635046, "rewards/wordcountpos_reward/raw_geo/std": 0.14775844679852268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.23090001042619038, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 915.3125, "completions/mean_terminated_length": 915.3125, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.14722944588917783, "frac_reward_zero_std": 0.0, "grad_norm": 3.034484580566022, "kl": 0.00516510009765625, "learning_rate": 9.939547922911215e-07, "loss": -0.042, "num_tokens": 31853932.0, "reward": -1.862645149230957e-08, "reward_std": 1.0283491611480713, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010920550721835287, "rewards/wordcountpos_reward/raw_geo/std": 0.11000018238737036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 981.4375, "completions/mean_terminated_length": 981.4375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.14742948589717944, "frac_reward_zero_std": 0.0, "grad_norm": 3.1664390033328695, "kl": 0.005584716796875, "learning_rate": 9.9390335098415e-07, "loss": 0.0094, "num_tokens": 31895347.0, "reward": -7.450580596923828e-09, "reward_std": 1.0350008010864258, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.16003932504552604, "rewards/wordcountpos_reward/raw_geo/std": 0.06289044526740273, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1020.75, "completions/mean_terminated_length": 1020.75, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.14762952590518102, "frac_reward_zero_std": 0.0, "grad_norm": 2.4456819284001785, "kl": 0.0038013458251953125, "learning_rate": 9.938516932278888e-07, "loss": -0.0242, "num_tokens": 31930247.0, "reward": 0.0, "reward_std": 0.6868234872817993, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12892066146394085, "rewards/wordcountpos_reward/raw_geo/std": 0.17193502673549263, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1126.0, "completions/mean_terminated_length": 1072.571533203125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.14782956591318264, "frac_reward_zero_std": 0.0, "grad_norm": 1.9059478960136789, "kl": 0.0029144287109375, "learning_rate": 9.937998190475266e-07, "loss": -0.0446, "num_tokens": 31966103.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0434917211532593, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03284074013793732, "rewards/wordcountpos_reward/raw_geo/std": 0.0948683697455548, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1079.9375, "completions/mean_terminated_length": 983.0000610351562, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.14802960592118425, "frac_reward_zero_std": 0.0, "grad_norm": 3.245806013307816, "kl": 0.00577545166015625, "learning_rate": 9.937477284683574e-07, "loss": 0.0028, "num_tokens": 32013542.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4754641652107239, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1492716070061221, "rewards/wordcountpos_reward/raw_geo/std": 0.11078340341472707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1133.625, "completions/mean_terminated_length": 1133.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.14822964592918583, "frac_reward_zero_std": 0.0, "grad_norm": 2.7144974746630073, "kl": 0.0039825439453125, "learning_rate": 9.936954215157807e-07, "loss": -0.0454, "num_tokens": 32043712.0, "reward": 0.0, "reward_std": 0.8612264394760132, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20620909880804195, "rewards/wordcountpos_reward/raw_geo/std": 0.04436061698883284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1018.1875, "completions/mean_terminated_length": 1018.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.14842968593718744, "frac_reward_zero_std": 0.0, "grad_norm": 3.6877184711393194, "kl": 0.0062713623046875, "learning_rate": 9.936428982153017e-07, "loss": 0.0018, "num_tokens": 32071235.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8817063570022583, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02431519355908192, "rewards/wordcountpos_reward/raw_geo/std": 0.04731358555872976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1156.8125, "completions/mean_terminated_length": 1156.8125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.14862972594518903, "frac_reward_zero_std": 0.0, "grad_norm": 2.727083189462933, "kl": 0.0050811767578125, "learning_rate": 9.935901585925309e-07, "loss": -0.002, "num_tokens": 32115200.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9644925594329834, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022266794489776795, "rewards/wordcountpos_reward/raw_geo/std": 0.2399209765150163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16141733350404336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1272.375, "completions/mean_terminated_length": 1095.3333740234375, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.14882976595319064, "frac_reward_zero_std": 0.0, "grad_norm": 3.2697999810790837, "kl": 0.006439208984375, "learning_rate": 9.935372026731847e-07, "loss": -0.0733, "num_tokens": 32170958.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9885945320129395, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09825797115494561, "rewards/wordcountpos_reward/raw_geo/std": 0.09147345310798999, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1125.625, "completions/mean_terminated_length": 1125.625, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.14902980596119225, "frac_reward_zero_std": 0.0, "grad_norm": 2.702942452417753, "kl": 0.00390625, "learning_rate": 9.934840304830843e-07, "loss": 0.0248, "num_tokens": 32217168.0, "reward": 5.960464477539063e-08, "reward_std": 0.4093765616416931, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05871235552625615, "rewards/wordcountpos_reward/raw_geo/std": 0.13916236591237247, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1203.1875, "completions/mean_terminated_length": 1183.4000244140625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.14922984596919384, "frac_reward_zero_std": 0.0, "grad_norm": 3.080933903956693, "kl": 0.007293701171875, "learning_rate": 9.934306420481567e-07, "loss": 0.0034, "num_tokens": 32261027.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9661141037940979, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009531510017604446, "rewards/wordcountpos_reward/raw_geo/std": 0.140438020058125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1203.625, "completions/mean_terminated_length": 1161.2857666015625, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.14942988597719545, "frac_reward_zero_std": 0.0, "grad_norm": 3.220097252933624, "kl": 0.0067596435546875, "learning_rate": 9.933770373944344e-07, "loss": 0.0159, "num_tokens": 32314493.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9547731876373291, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18587379606769372, "rewards/wordcountpos_reward/raw_geo/std": 0.3465018471227371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1173.75, "completions/mean_terminated_length": 1127.1429443359375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.14962992598519703, "frac_reward_zero_std": 0.0, "grad_norm": 3.391438531266516, "kl": 0.0070648193359375, "learning_rate": 9.933232165480555e-07, "loss": -0.007, "num_tokens": 32358361.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9440287351608276, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17168122591343074, "rewards/wordcountpos_reward/raw_geo/std": 0.18252298291517974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1162.25, "completions/mean_terminated_length": 1162.25, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.14982996599319864, "frac_reward_zero_std": 0.0, "grad_norm": 2.4449131856697055, "kl": 0.00525665283203125, "learning_rate": 9.932691795352632e-07, "loss": -0.0383, "num_tokens": 32401717.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8779486417770386, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16634102989475238, "rewards/wordcountpos_reward/raw_geo/std": 0.11342233354185205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 993.375, "completions/mean_terminated_length": 993.375, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.15003000600120023, "frac_reward_zero_std": 0.0, "grad_norm": 2.466370035448869, "kl": 0.0038604736328125, "learning_rate": 9.93214926382406e-07, "loss": -0.0407, "num_tokens": 32454363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8034205436706543, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009196259360216556, "rewards/wordcountpos_reward/raw_geo/std": 0.08845259004288432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1149.0625, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.15023004600920184, "frac_reward_zero_std": 0.0, "grad_norm": 2.8897311396365217, "kl": 0.00701141357421875, "learning_rate": 9.931604571159382e-07, "loss": 0.0268, "num_tokens": 32501916.0, "reward": 7.450580596923828e-09, "reward_std": 0.9287494421005249, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.021950882587778955, "rewards/wordcountpos_reward/raw_geo/std": 0.10769324891808538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1098.0, "completions/mean_length": 973.5625, "completions/mean_terminated_length": 898.357177734375, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.15043008601720345, "frac_reward_zero_std": 0.0, "grad_norm": 3.046997666110846, "kl": 0.00571441650390625, "learning_rate": 9.931057717624192e-07, "loss": -0.0049, "num_tokens": 32547909.0, "reward": 0.0, "reward_std": 0.32828089594841003, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13448202488360017, "rewards/wordcountpos_reward/raw_geo/std": 0.07732084908212608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1095.3125, "completions/mean_terminated_length": 1095.3125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15063012602520504, "frac_reward_zero_std": 0.0, "grad_norm": 3.067917425095031, "kl": 0.0070037841796875, "learning_rate": 9.930508703485136e-07, "loss": 0.0391, "num_tokens": 32590770.0, "reward": 0.0, "reward_std": 0.8541622161865234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011933431246930814, "rewards/wordcountpos_reward/raw_geo/std": 0.08689841186406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1088.3125, "completions/mean_terminated_length": 1060.86669921875, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.15083016603320665, "frac_reward_zero_std": 0.0, "grad_norm": 3.068528995159541, "kl": 0.006439208984375, "learning_rate": 9.929957529009918e-07, "loss": 0.0306, "num_tokens": 32628855.0, "reward": 2.9802322387695312e-08, "reward_std": 0.38358789682388306, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.043046412045031665, "rewards/wordcountpos_reward/raw_geo/std": 0.25446104557176796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1150.0625, "completions/mean_terminated_length": 1126.7333984375, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.15103020604120823, "frac_reward_zero_std": 0.0, "grad_norm": 3.2115053114292307, "kl": 0.0078125, "learning_rate": 9.929404194467294e-07, "loss": -0.0406, "num_tokens": 32669312.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7136757969856262, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1089064821287911, "rewards/wordcountpos_reward/raw_geo/std": 0.29028223127328384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1201.0, "completions/mean_terminated_length": 1201.0, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.15123024604920984, "frac_reward_zero_std": 0.0, "grad_norm": 3.0151202960823644, "kl": 0.0086212158203125, "learning_rate": 9.92884870012707e-07, "loss": -0.0313, "num_tokens": 32715840.0, "reward": 0.0, "reward_std": 0.7436599731445312, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.5061100551384478, "rewards/wordcountpos_reward/raw_geo/std": 0.41022376086854195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1256.0, "completions/max_terminated_length": 1256.0, "completions/mean_length": 961.1875, "completions/mean_terminated_length": 961.1875, "completions/min_length": 649.0, "completions/min_terminated_length": 649.0, "epoch": 0.15143028605721146, "frac_reward_zero_std": 0.0, "grad_norm": 3.2990875132831365, "kl": 0.005046844482421875, "learning_rate": 9.92829104626011e-07, "loss": 0.005, "num_tokens": 32748427.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9326552152633667, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046124962930485486, "rewards/wordcountpos_reward/raw_geo/std": 0.05156686921410211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 882.75, "completions/mean_terminated_length": 882.75, "completions/min_length": 604.0, "completions/min_terminated_length": 604.0, "epoch": 0.15163032606521304, "frac_reward_zero_std": 0.0, "grad_norm": 2.9441380322509407, "kl": 0.00485992431640625, "learning_rate": 9.927731233138326e-07, "loss": -0.026, "num_tokens": 32785207.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0674519538879395, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -3.161302955743527e-05, "rewards/wordcountpos_reward/raw_geo/std": 0.062072532673726956, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1212.1875, "completions/mean_terminated_length": 1193.0001220703125, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.15183036607321465, "frac_reward_zero_std": 0.0, "grad_norm": 2.992312458461508, "kl": 0.007843017578125, "learning_rate": 9.927169261034687e-07, "loss": 0.0049, "num_tokens": 32831642.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0592217445373535, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06948679730555916, "rewards/wordcountpos_reward/raw_geo/std": 0.05500610085768121, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1141.5625, "completions/mean_terminated_length": 1117.666748046875, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.15203040608121624, "frac_reward_zero_std": 0.0, "grad_norm": 3.7420053574943477, "kl": 0.00701141357421875, "learning_rate": 9.926605130223215e-07, "loss": 0.0009, "num_tokens": 32886659.0, "reward": -5.960464477539063e-08, "reward_std": 0.7499135732650757, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21018875023133704, "rewards/wordcountpos_reward/raw_geo/std": 0.16591610409029345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1093.0, "completions/mean_length": 898.9375, "completions/mean_terminated_length": 858.86669921875, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.15223044608921785, "frac_reward_zero_std": 0.0, "grad_norm": 3.9902208007971676, "kl": 0.0074005126953125, "learning_rate": 9.926038840978979e-07, "loss": -0.0374, "num_tokens": 32935474.0, "reward": 0.0, "reward_std": 0.7089831829071045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02938673985528205, "rewards/wordcountpos_reward/raw_geo/std": 0.17560534291750896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1203.5625, "completions/mean_terminated_length": 1203.5625, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.15243048609721943, "frac_reward_zero_std": 0.0, "grad_norm": 3.156381242634378, "kl": 0.0059356689453125, "learning_rate": 9.925470393578105e-07, "loss": -0.0071, "num_tokens": 32987083.0, "reward": 0.0, "reward_std": 0.5327888131141663, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12694652656832794, "rewards/wordcountpos_reward/raw_geo/std": 0.07866495400936042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1229.0, "completions/mean_terminated_length": 1138.666748046875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.15263052610522104, "frac_reward_zero_std": 0.0, "grad_norm": 3.343199628347586, "kl": 0.008636474609375, "learning_rate": 9.924899788297773e-07, "loss": -0.0287, "num_tokens": 33036267.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7331575155258179, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13364927828035372, "rewards/wordcountpos_reward/raw_geo/std": 0.11826819552272544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 972.375, "completions/mean_terminated_length": 972.375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.15283056611322265, "frac_reward_zero_std": 0.0, "grad_norm": 3.9499213157672792, "kl": 0.0081939697265625, "learning_rate": 9.924327025416213e-07, "loss": -0.0443, "num_tokens": 33073721.0, "reward": 2.2351741790771484e-08, "reward_std": 1.05270254611969, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001051356776890749, "rewards/wordcountpos_reward/raw_geo/std": 0.13097985987539415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1110.875, "completions/mean_terminated_length": 1110.875, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.15303060612122424, "frac_reward_zero_std": 0.0, "grad_norm": 2.1374146190028673, "kl": 0.00360870361328125, "learning_rate": 9.9237521052127e-07, "loss": -0.0295, "num_tokens": 33115543.0, "reward": 5.960464477539063e-08, "reward_std": 0.7502629160881042, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03101903696506245, "rewards/wordcountpos_reward/raw_geo/std": 0.1335707904399487, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1011.375, "completions/mean_terminated_length": 1011.375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.15323064612922585, "frac_reward_zero_std": 0.0, "grad_norm": 3.605569915819742, "kl": 0.00687408447265625, "learning_rate": 9.923175027967577e-07, "loss": -0.0252, "num_tokens": 33147101.0, "reward": 0.0, "reward_std": 0.6953427791595459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03576088829341519, "rewards/wordcountpos_reward/raw_geo/std": 0.10368129152337263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1100.9375, "completions/mean_terminated_length": 1074.3333740234375, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.15343068613722743, "frac_reward_zero_std": 0.0, "grad_norm": 3.467804417765987, "kl": 0.011077880859375, "learning_rate": 9.922595793962223e-07, "loss": -0.0183, "num_tokens": 33193932.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9323349595069885, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0400680178423756, "rewards/wordcountpos_reward/raw_geo/std": 0.16127185716275536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1036.0625, "completions/mean_terminated_length": 1005.1333618164062, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.15363072614522905, "frac_reward_zero_std": 0.0, "grad_norm": 3.334475969936804, "kl": 0.0064849853515625, "learning_rate": 9.92201440347908e-07, "loss": -0.009, "num_tokens": 33233789.0, "reward": 0.0, "reward_std": 0.9955896139144897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.047388124107884795, "rewards/wordcountpos_reward/raw_geo/std": 0.05931634592262268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1181.0, "completions/mean_terminated_length": 1181.0, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.15383076615323066, "frac_reward_zero_std": 0.0, "grad_norm": 3.0008269550301305, "kl": 0.005889892578125, "learning_rate": 9.921430856801631e-07, "loss": -0.0288, "num_tokens": 33278989.0, "reward": 0.0, "reward_std": 0.9743967056274414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024752236001593122, "rewards/wordcountpos_reward/raw_geo/std": 0.0472160239814566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635781, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1167.9375, "completions/mean_terminated_length": 1120.5, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.15403080616123224, "frac_reward_zero_std": 0.0, "grad_norm": 3.34023770231757, "kl": 0.00862884521484375, "learning_rate": 9.92084515421442e-07, "loss": 0.0008, "num_tokens": 33329788.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0200579166412354, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022090468620292347, "rewards/wordcountpos_reward/raw_geo/std": 0.04407479170293743, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1116.0, "completions/max_terminated_length": 1116.0, "completions/mean_length": 887.1875, "completions/mean_terminated_length": 887.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.15423084616923385, "frac_reward_zero_std": 0.0, "grad_norm": 4.084139581071052, "kl": 0.010040283203125, "learning_rate": 9.920257296003035e-07, "loss": -0.0259, "num_tokens": 33357175.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8843921422958374, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09800201277649528, "rewards/wordcountpos_reward/raw_geo/std": 0.08875685994311198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1132.75, "completions/mean_terminated_length": 1108.2667236328125, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.15443088617723544, "frac_reward_zero_std": 0.0, "grad_norm": 3.2775307182010196, "kl": 0.0063018798828125, "learning_rate": 9.919667282454123e-07, "loss": 0.0169, "num_tokens": 33401019.0, "reward": -2.9802322387695312e-08, "reward_std": 0.48580998182296753, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010818859185801187, "rewards/wordcountpos_reward/raw_geo/std": 0.13812552676118908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1250.25, "completions/mean_terminated_length": 1250.25, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.15463092618523705, "frac_reward_zero_std": 0.0, "grad_norm": 3.3491310414750557, "kl": 0.0070953369140625, "learning_rate": 9.919075113855374e-07, "loss": 0.0289, "num_tokens": 33439639.0, "reward": 0.0, "reward_std": 0.6744941473007202, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0828370915743899, "rewards/wordcountpos_reward/raw_geo/std": 0.060435216074655906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1100.9375, "completions/mean_terminated_length": 1100.9375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.15483096619323863, "frac_reward_zero_std": 0.0, "grad_norm": 2.7543085857557426, "kl": 0.005008697509765625, "learning_rate": 9.918480790495533e-07, "loss": -0.0211, "num_tokens": 33480782.0, "reward": 2.9802322387695312e-08, "reward_std": 0.703142523765564, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18140903927184088, "rewards/wordcountpos_reward/raw_geo/std": 0.20375648648593733, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1103.4375, "completions/mean_terminated_length": 1077.0, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.15503100620124025, "frac_reward_zero_std": 0.0, "grad_norm": 3.354352017332648, "kl": 0.0090484619140625, "learning_rate": 9.917884312664395e-07, "loss": -0.0725, "num_tokens": 33523581.0, "reward": 0.0, "reward_std": 0.8834471702575684, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14810737680414726, "rewards/wordcountpos_reward/raw_geo/std": 0.21037850141902315, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572018, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1078.125, "completions/mean_terminated_length": 1078.125, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.15523104620924186, "frac_reward_zero_std": 0.0, "grad_norm": 3.3468395667011417, "kl": 0.0092620849609375, "learning_rate": 9.917285680652805e-07, "loss": -0.044, "num_tokens": 33563463.0, "reward": 0.0, "reward_std": 0.5799727439880371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005290466044875371, "rewards/wordcountpos_reward/raw_geo/std": 0.29740636269808074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1341.9375, "completions/mean_terminated_length": 1305.4615478515625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.15543108621724344, "frac_reward_zero_std": 0.0, "grad_norm": 2.9151895767774505, "kl": 0.00675201416015625, "learning_rate": 9.916684894752659e-07, "loss": -0.0437, "num_tokens": 33609182.0, "reward": 0.0, "reward_std": 0.5609688758850098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02191452602287353, "rewards/wordcountpos_reward/raw_geo/std": 0.1947108641697368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1156.875, "completions/mean_terminated_length": 1156.875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.15563112622524505, "frac_reward_zero_std": 0.0, "grad_norm": 3.1438240134222766, "kl": 0.0071868896484375, "learning_rate": 9.916081955256902e-07, "loss": 0.0289, "num_tokens": 33651060.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8530892133712769, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024303858044810858, "rewards/wordcountpos_reward/raw_geo/std": 0.1037123075816192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1063.9375, "completions/mean_terminated_length": 1063.9375, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.15583116623324664, "frac_reward_zero_std": 0.0, "grad_norm": 2.9340274692827992, "kl": 0.00630950927734375, "learning_rate": 9.915476862459529e-07, "loss": -0.0184, "num_tokens": 33682843.0, "reward": 0.0, "reward_std": 0.7483388781547546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03263129962382793, "rewards/wordcountpos_reward/raw_geo/std": 0.09788571460764907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1115.5, "completions/mean_terminated_length": 1089.86669921875, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15603120624124825, "frac_reward_zero_std": 0.0, "grad_norm": 3.4798945538773367, "kl": 0.010955810546875, "learning_rate": 9.91486961665559e-07, "loss": -0.02, "num_tokens": 33731571.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9627860188484192, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0038596593208819474, "rewards/wordcountpos_reward/raw_geo/std": 0.09357520253925831, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1160.0625, "completions/mean_terminated_length": 1137.4000244140625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.15623124624924986, "frac_reward_zero_std": 0.0, "grad_norm": 3.6290723312151876, "kl": 0.00949859619140625, "learning_rate": 9.914260218141179e-07, "loss": -0.0007, "num_tokens": 33778788.0, "reward": 0.0, "reward_std": 0.8243637084960938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16686743051378558, "rewards/wordcountpos_reward/raw_geo/std": 0.23919704839104772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 945.25, "completions/mean_terminated_length": 945.25, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.15643128625725145, "frac_reward_zero_std": 0.0, "grad_norm": 3.3978790163975354, "kl": 0.00733184814453125, "learning_rate": 9.913648667213438e-07, "loss": 0.0087, "num_tokens": 33816752.0, "reward": 0.0, "reward_std": 0.7842049598693848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07163627658839805, "rewards/wordcountpos_reward/raw_geo/std": 0.11252061558288198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1328.0, "completions/mean_terminated_length": 1303.4285888671875, "completions/min_length": 1136.0, "completions/min_terminated_length": 1136.0, "epoch": 0.15663132626525306, "frac_reward_zero_std": 0.0, "grad_norm": 2.3640950608589484, "kl": 0.005664825439453125, "learning_rate": 9.913034964170567e-07, "loss": -0.006, "num_tokens": 33856304.0, "reward": 0.0, "reward_std": 0.6626273393630981, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1877841178051203, "rewards/wordcountpos_reward/raw_geo/std": 0.10278288994291368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1054.5, "completions/mean_terminated_length": 1054.5, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.15683136627325464, "frac_reward_zero_std": 0.0, "grad_norm": 3.1832911400260273, "kl": 0.00711822509765625, "learning_rate": 9.912419109311807e-07, "loss": 0.0048, "num_tokens": 33907536.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5279848575592041, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12141121207981156, "rewards/wordcountpos_reward/raw_geo/std": 0.2897728240012861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1253.875, "completions/mean_terminated_length": 1197.0770263671875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.15703140628125625, "frac_reward_zero_std": 0.0, "grad_norm": 3.612087941973419, "kl": 0.0083465576171875, "learning_rate": 9.911801102937455e-07, "loss": 0.0043, "num_tokens": 33956414.0, "reward": 0.0, "reward_std": 0.8467312455177307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06869423680342421, "rewards/wordcountpos_reward/raw_geo/std": 0.07720460893057841, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14801151106386087, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1156.25, "completions/mean_terminated_length": 1156.25, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.15723144628925786, "frac_reward_zero_std": 0.0, "grad_norm": 3.177757514213256, "kl": 0.007659912109375, "learning_rate": 9.91118094534885e-07, "loss": 0.0099, "num_tokens": 34003698.0, "reward": 0.0, "reward_std": 0.4837278127670288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10621223160088289, "rewards/wordcountpos_reward/raw_geo/std": 0.1536083964388962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.2237723711142063, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1269.875, "completions/mean_terminated_length": 1216.769287109375, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.15743148629725945, "frac_reward_zero_std": 0.0, "grad_norm": 3.094827749582465, "kl": 0.00800323486328125, "learning_rate": 9.910558636848384e-07, "loss": 0.013, "num_tokens": 34050360.0, "reward": 0.0, "reward_std": 0.8311507701873779, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12648163447065644, "rewards/wordcountpos_reward/raw_geo/std": 0.06127426615184167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.15763152630526106, "frac_reward_zero_std": 0.0, "grad_norm": 2.758995880215236, "kl": 0.007659912109375, "learning_rate": 9.909934177739502e-07, "loss": -0.0651, "num_tokens": 34096010.0, "reward": 0.0, "reward_std": 0.7082724571228027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14976231314995758, "rewards/wordcountpos_reward/raw_geo/std": 0.29622757573526715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13977495139343474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15783156631326264, "frac_reward_zero_std": 0.0, "grad_norm": 2.2593399404785743, "kl": 0.0043792724609375, "learning_rate": 9.909307568326686e-07, "loss": -0.0171, "num_tokens": 34144862.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5086317658424377, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008440912037138155, "rewards/wordcountpos_reward/raw_geo/std": 0.13763680823236565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1192.875, "completions/mean_terminated_length": 1122.0, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.15803160632126426, "frac_reward_zero_std": 0.0, "grad_norm": 3.4937826097082887, "kl": 0.00675201416015625, "learning_rate": 9.90867880891548e-07, "loss": -0.0118, "num_tokens": 34189388.0, "reward": 7.450580596923828e-09, "reward_std": 1.0656585693359375, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.17107447489817776, "rewards/wordcountpos_reward/raw_geo/std": 0.06637125902080088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 951.9375, "completions/mean_terminated_length": 951.9375, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.15823164632926584, "frac_reward_zero_std": 0.0, "grad_norm": 4.0828284993154025, "kl": 0.0092010498046875, "learning_rate": 9.908047899812468e-07, "loss": -0.0381, "num_tokens": 34229043.0, "reward": 0.0, "reward_std": 0.7647432088851929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07455052843366836, "rewards/wordcountpos_reward/raw_geo/std": 0.05064521121716925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279464, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1234.0, "completions/mean_terminated_length": 1172.615478515625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.15843168633726745, "frac_reward_zero_std": 0.0, "grad_norm": 3.272521924112114, "kl": 0.0084381103515625, "learning_rate": 9.907414841325283e-07, "loss": 0.0114, "num_tokens": 34281283.0, "reward": 0.0, "reward_std": 0.701446533203125, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04508345847290812, "rewards/wordcountpos_reward/raw_geo/std": 0.07121332733857914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1161.375, "completions/mean_terminated_length": 1138.800048828125, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.15863172634526906, "frac_reward_zero_std": 0.0, "grad_norm": 2.681399929360685, "kl": 0.0063323974609375, "learning_rate": 9.906779633762606e-07, "loss": -0.0078, "num_tokens": 34324145.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4002396762371063, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03854446077547653, "rewards/wordcountpos_reward/raw_geo/std": 0.08550339471886417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1162.75, "completions/mean_terminated_length": 1140.2667236328125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.15883176635327065, "frac_reward_zero_std": 0.0, "grad_norm": 3.15440829909591, "kl": 0.009765625, "learning_rate": 9.906142277434172e-07, "loss": 0.0176, "num_tokens": 34371597.0, "reward": 0.0, "reward_std": 1.009348750114441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16334101228147407, "rewards/wordcountpos_reward/raw_geo/std": 0.05487435788636801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1163.25, "completions/mean_terminated_length": 1085.5384521484375, "completions/min_length": 1023.0, "completions/min_terminated_length": 1023.0, "epoch": 0.15903180636127226, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098794809363726, "kl": 0.0064849853515625, "learning_rate": 9.905502772650754e-07, "loss": -0.0252, "num_tokens": 34415513.0, "reward": 0.0, "reward_std": 0.605635941028595, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10950574364291185, "rewards/wordcountpos_reward/raw_geo/std": 0.14417382336820583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1103.0, "completions/max_terminated_length": 1103.0, "completions/mean_length": 908.25, "completions/mean_terminated_length": 908.25, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.15923184636927384, "frac_reward_zero_std": 0.0, "grad_norm": 2.873626986944352, "kl": 0.003871917724609375, "learning_rate": 9.904861119724178e-07, "loss": -0.0088, "num_tokens": 34454709.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6537288427352905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.039580564833577754, "rewards/wordcountpos_reward/raw_geo/std": 0.035559756890949014, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1197.8125, "completions/mean_terminated_length": 1197.8125, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.15943188637727546, "frac_reward_zero_std": 0.0, "grad_norm": 2.7709073663014485, "kl": 0.005016326904296875, "learning_rate": 9.904217318967318e-07, "loss": 0.0153, "num_tokens": 34494546.0, "reward": 2.60770320892334e-08, "reward_std": 0.9712255001068115, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06141137734164746, "rewards/wordcountpos_reward/raw_geo/std": 0.09061907886218735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1193.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 998.0, "completions/mean_terminated_length": 998.0, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.15963192638527707, "frac_reward_zero_std": 0.0, "grad_norm": 3.911791898690215, "kl": 0.010162353515625, "learning_rate": 9.903571370694094e-07, "loss": 0.0104, "num_tokens": 34543082.0, "reward": -1.4901161193847656e-08, "reward_std": 1.00223708152771, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06160501681784743, "rewards/wordcountpos_reward/raw_geo/std": 0.06019470116425234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1103.6875, "completions/mean_terminated_length": 1103.6875, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.15983196639327865, "frac_reward_zero_std": 0.0, "grad_norm": 3.3227233177959383, "kl": 0.00799560546875, "learning_rate": 9.902923275219475e-07, "loss": 0.0047, "num_tokens": 34574045.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0319489240646362, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10032981658898234, "rewards/wordcountpos_reward/raw_geo/std": 0.1059968891792867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1061.5, "completions/mean_terminated_length": 1061.5, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.16003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.672975893330212, "kl": 0.007659912109375, "learning_rate": 9.902273032859472e-07, "loss": -0.0089, "num_tokens": 34606405.0, "reward": 0.0, "reward_std": 0.8354414105415344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04063577222995593, "rewards/wordcountpos_reward/raw_geo/std": 0.0975359924965412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 968.0, "completions/mean_terminated_length": 968.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.16023204640928185, "frac_reward_zero_std": 0.0, "grad_norm": 3.850952848482492, "kl": 0.00897216796875, "learning_rate": 9.90162064393115e-07, "loss": -0.0336, "num_tokens": 34637141.0, "reward": 0.0, "reward_std": 0.9965044856071472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04524656828288196, "rewards/wordcountpos_reward/raw_geo/std": 0.056714281291219464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1199.4375, "completions/mean_terminated_length": 1062.8182373046875, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16043208641728346, "frac_reward_zero_std": 0.0, "grad_norm": 3.410119429660572, "kl": 0.00687408447265625, "learning_rate": 9.900966108752614e-07, "loss": 0.0273, "num_tokens": 34687476.0, "reward": 7.450580596923828e-09, "reward_std": 1.0613950490951538, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11234680641347201, "rewards/wordcountpos_reward/raw_geo/std": 0.05078166043023254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 956.375, "completions/mean_terminated_length": 956.375, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.16063212642528504, "frac_reward_zero_std": 0.0, "grad_norm": 3.7621150712402063, "kl": 0.0075225830078125, "learning_rate": 9.900309427643018e-07, "loss": 0.044, "num_tokens": 34727898.0, "reward": 0.0, "reward_std": 0.6777828931808472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.39093038955561893, "rewards/wordcountpos_reward/raw_geo/std": 0.24487287597197832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1013.0, "completions/mean_terminated_length": 1013.0, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.16083216643328666, "frac_reward_zero_std": 0.0, "grad_norm": 3.8139140704765246, "kl": 0.010498046875, "learning_rate": 9.899650600922566e-07, "loss": 0.0306, "num_tokens": 34775034.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9415539503097534, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17483805481864553, "rewards/wordcountpos_reward/raw_geo/std": 0.12094116116523004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 981.8125, "completions/mean_terminated_length": 981.8125, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.16103220644128827, "frac_reward_zero_std": 0.0, "grad_norm": 3.816486220351593, "kl": 0.0101776123046875, "learning_rate": 9.8989896289125e-07, "loss": 0.0063, "num_tokens": 34813615.0, "reward": 0.0, "reward_std": 0.8158285617828369, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07149384005877667, "rewards/wordcountpos_reward/raw_geo/std": 0.12996047715949613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1072.125, "completions/mean_terminated_length": 1043.60009765625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.16123224644928985, "frac_reward_zero_std": 0.0, "grad_norm": 2.7699095221827665, "kl": 0.00536346435546875, "learning_rate": 9.898326511935117e-07, "loss": -0.0025, "num_tokens": 34859065.0, "reward": 0.0, "reward_std": 0.8644427061080933, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014355155240497748, "rewards/wordcountpos_reward/raw_geo/std": 0.11236367552304588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 996.3125, "completions/mean_terminated_length": 996.3125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.16143228645729146, "frac_reward_zero_std": 0.0, "grad_norm": 3.452551918966318, "kl": 0.00699615478515625, "learning_rate": 9.897661250313755e-07, "loss": 0.0055, "num_tokens": 34892854.0, "reward": -7.450580596923828e-09, "reward_std": 1.0471452474594116, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03303136901863547, "rewards/wordcountpos_reward/raw_geo/std": 0.032051244998446984, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1194.1875, "completions/mean_terminated_length": 1055.181884765625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.16163232646529305, "frac_reward_zero_std": 0.0, "grad_norm": 2.608166205892848, "kl": 0.0068511962890625, "learning_rate": 9.896993844372794e-07, "loss": -0.2146, "num_tokens": 34933089.0, "reward": 0.0, "reward_std": 0.9058641195297241, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2515449946723898, "rewards/wordcountpos_reward/raw_geo/std": 0.2506604925653395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.18373692949230228, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1039.0, "completions/mean_terminated_length": 1039.0, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.16183236647329466, "frac_reward_zero_std": 0.0, "grad_norm": 2.6621287210917313, "kl": 0.00592041015625, "learning_rate": 9.896324294437672e-07, "loss": 0.024, "num_tokens": 34975401.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5400734543800354, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06484564826238085, "rewards/wordcountpos_reward/raw_geo/std": 0.21570354738245273, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1092.3125, "completions/mean_terminated_length": 1065.1334228515625, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.16203240648129627, "frac_reward_zero_std": 0.0, "grad_norm": 3.1963898660836096, "kl": 0.00835418701171875, "learning_rate": 9.895652600834859e-07, "loss": -0.0879, "num_tokens": 35009190.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9988131523132324, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09594663405594336, "rewards/wordcountpos_reward/raw_geo/std": 0.05843662382213452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1100.0909423828125, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.16223244648929785, "frac_reward_zero_std": 0.0, "grad_norm": 2.9087438998984143, "kl": 0.00605010986328125, "learning_rate": 9.894978763891879e-07, "loss": -0.0093, "num_tokens": 35060615.0, "reward": 0.0, "reward_std": 0.821182131767273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015604559454176714, "rewards/wordcountpos_reward/raw_geo/std": 0.057690123591530036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1079.3125, "completions/mean_terminated_length": 1079.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.16243248649729947, "frac_reward_zero_std": 0.0, "grad_norm": 3.5646145112160332, "kl": 0.0086212158203125, "learning_rate": 9.894302783937296e-07, "loss": -0.0528, "num_tokens": 35101980.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0420316457748413, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1819244669661149, "rewards/wordcountpos_reward/raw_geo/std": 0.2371044929453032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12995725793078622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1068.0, "completions/max_terminated_length": 1068.0, "completions/mean_length": 889.75, "completions/mean_terminated_length": 889.75, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.16263252650530105, "frac_reward_zero_std": 0.0, "grad_norm": 4.21720415067769, "kl": 0.00738525390625, "learning_rate": 9.89362466130072e-07, "loss": -0.0093, "num_tokens": 35141960.0, "reward": 7.450580596923828e-09, "reward_std": 1.0645757913589478, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07809130347153606, "rewards/wordcountpos_reward/raw_geo/std": 0.06638365163858437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1044.5, "completions/mean_terminated_length": 979.4285888671875, "completions/min_length": 666.0, "completions/min_terminated_length": 666.0, "epoch": 0.16283256651330266, "frac_reward_zero_std": 0.0, "grad_norm": 2.949445270254486, "kl": 0.006389617919921875, "learning_rate": 9.892944396312812e-07, "loss": -0.0408, "num_tokens": 35180528.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9647680521011353, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07753679244930345, "rewards/wordcountpos_reward/raw_geo/std": 0.13283745496342944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1095.5625, "completions/mean_terminated_length": 1095.5625, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16303260652130427, "frac_reward_zero_std": 0.0, "grad_norm": 1.0429789678872363, "kl": 0.0020971298217773438, "learning_rate": 9.892261989305264e-07, "loss": -0.001, "num_tokens": 35228905.0, "reward": 0.0, "reward_std": 0.5095144510269165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14381371010453364, "rewards/wordcountpos_reward/raw_geo/std": 0.09447399646724984, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15147423690002354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1012.25, "completions/mean_terminated_length": 1012.25, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.16323264652930586, "frac_reward_zero_std": 0.0, "grad_norm": 3.5555673393138703, "kl": 0.0089111328125, "learning_rate": 9.891577440610827e-07, "loss": -0.0515, "num_tokens": 35280949.0, "reward": 0.0, "reward_std": 0.7069839835166931, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14147677355016686, "rewards/wordcountpos_reward/raw_geo/std": 0.14644595010787564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1174.9375, "completions/mean_terminated_length": 1128.5, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.16343268653730747, "frac_reward_zero_std": 0.0, "grad_norm": 3.5562781994207597, "kl": 0.00789642333984375, "learning_rate": 9.89089075056329e-07, "loss": -0.0559, "num_tokens": 35325924.0, "reward": 0.0, "reward_std": 0.6456366777420044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014544088282210446, "rewards/wordcountpos_reward/raw_geo/std": 0.05298646855861031, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1016.4375, "completions/mean_terminated_length": 1016.4375, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.16363272654530905, "frac_reward_zero_std": 0.0, "grad_norm": 3.3597090792094257, "kl": 0.00727081298828125, "learning_rate": 9.890201919497482e-07, "loss": 0.0004, "num_tokens": 35367315.0, "reward": 0.0, "reward_std": 0.8697878122329712, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04491427464205154, "rewards/wordcountpos_reward/raw_geo/std": 0.09087934605437703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1397749513934347, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1111.875, "completions/mean_terminated_length": 1086.0, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.16383276655331067, "frac_reward_zero_std": 0.0, "grad_norm": 2.992608699653316, "kl": 0.006195068359375, "learning_rate": 9.889510947749282e-07, "loss": -0.0087, "num_tokens": 35408993.0, "reward": -1.4901161193847656e-08, "reward_std": 1.021742820739746, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1560479742922557, "rewards/wordcountpos_reward/raw_geo/std": 0.09691535501412255, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 977.1875, "completions/mean_terminated_length": 977.1875, "completions/min_length": 637.0, "completions/min_terminated_length": 637.0, "epoch": 0.16403280656131225, "frac_reward_zero_std": 0.0, "grad_norm": 3.0216239486390024, "kl": 0.00713348388671875, "learning_rate": 9.888817835655614e-07, "loss": -0.0938, "num_tokens": 35453844.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6564182639122009, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15543244519209015, "rewards/wordcountpos_reward/raw_geo/std": 0.12878481750210258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14497764834110988, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1061.5, "completions/mean_terminated_length": 1061.5, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.16423284656931386, "frac_reward_zero_std": 0.0, "grad_norm": 2.875493371725326, "kl": 0.00478363037109375, "learning_rate": 9.888122583554438e-07, "loss": 0.0313, "num_tokens": 35492476.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0196912288665771, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015551109633836213, "rewards/wordcountpos_reward/raw_geo/std": 0.10202990547716798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1133.4375, "completions/mean_terminated_length": 1109.0, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.16443288657731547, "frac_reward_zero_std": 0.0, "grad_norm": 3.647227448835252, "kl": 0.00818634033203125, "learning_rate": 9.887425191784765e-07, "loss": 0.0171, "num_tokens": 35533059.0, "reward": 4.470348358154297e-08, "reward_std": 0.8486392498016357, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01761084672418888, "rewards/wordcountpos_reward/raw_geo/std": 0.07303189988638512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 932.75, "completions/mean_terminated_length": 932.75, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.16463292658531706, "frac_reward_zero_std": 0.0, "grad_norm": 2.9147832188519938, "kl": 0.00701904296875, "learning_rate": 9.886725660686647e-07, "loss": 0.0091, "num_tokens": 35567119.0, "reward": 0.0, "reward_std": 0.5176118612289429, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09158306630554128, "rewards/wordcountpos_reward/raw_geo/std": 0.13005862147907943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.16483296659331867, "frac_reward_zero_std": 0.0, "grad_norm": 3.2968097928419806, "kl": 0.009613037109375, "learning_rate": 9.886023990601176e-07, "loss": -0.0033, "num_tokens": 35606357.0, "reward": 0.0, "reward_std": 0.6017299890518188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2324163070215693, "rewards/wordcountpos_reward/raw_geo/std": 0.058508599641958016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1152.875, "completions/mean_terminated_length": 1129.7333984375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.16503300660132025, "frac_reward_zero_std": 0.0, "grad_norm": 3.124815431908731, "kl": 0.01092529296875, "learning_rate": 9.88532018187049e-07, "loss": -0.0356, "num_tokens": 35652411.0, "reward": 7.450580596923828e-09, "reward_std": 1.0256050825119019, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.35022350323825163, "rewards/wordcountpos_reward/raw_geo/std": 0.07970165454230675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1238.0625, "completions/mean_terminated_length": 1238.0625, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.16523304660932187, "frac_reward_zero_std": 0.0, "grad_norm": 3.2579567456729834, "kl": 0.015838623046875, "learning_rate": 9.884614234837772e-07, "loss": -0.0185, "num_tokens": 35711548.0, "reward": 0.0, "reward_std": 1.0055289268493652, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14136925022695296, "rewards/wordcountpos_reward/raw_geo/std": 0.13019918711936887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1215.8125, "completions/mean_terminated_length": 1196.86669921875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.16543308661732348, "frac_reward_zero_std": 0.0, "grad_norm": 3.242224449434963, "kl": 0.0101318359375, "learning_rate": 9.88390614984724e-07, "loss": 0.0076, "num_tokens": 35761705.0, "reward": 0.0, "reward_std": 0.7001368403434753, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05096290442069456, "rewards/wordcountpos_reward/raw_geo/std": 0.1965121280241637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1122.875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.16563312662532506, "frac_reward_zero_std": 0.0, "grad_norm": 3.4414322312128554, "kl": 0.00727081298828125, "learning_rate": 9.883195927244165e-07, "loss": -0.0223, "num_tokens": 35802279.0, "reward": -5.960464477539063e-08, "reward_std": 1.0034435987472534, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1128687333551407, "rewards/wordcountpos_reward/raw_geo/std": 0.12751682723874852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 970.125, "completions/mean_terminated_length": 970.125, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.16583316663332667, "frac_reward_zero_std": 0.0, "grad_norm": 3.681751323837442, "kl": 0.0074920654296875, "learning_rate": 9.882483567374851e-07, "loss": 0.0099, "num_tokens": 35842025.0, "reward": -1.862645149230957e-08, "reward_std": 1.0243802070617676, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05534024647966182, "rewards/wordcountpos_reward/raw_geo/std": 0.10015460743881506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 956.0, "completions/mean_terminated_length": 956.0, "completions/min_length": 599.0, "completions/min_terminated_length": 599.0, "epoch": 0.16603320664132826, "frac_reward_zero_std": 0.0, "grad_norm": 2.721287053945097, "kl": 0.010284423828125, "learning_rate": 9.881769070586648e-07, "loss": 0.0068, "num_tokens": 35884985.0, "reward": 1.862645149230957e-09, "reward_std": 1.0149438381195068, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.023655234470058274, "rewards/wordcountpos_reward/raw_geo/std": 0.09388713262494991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1159.3125, "completions/mean_terminated_length": 1080.6923828125, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.16623324664932987, "frac_reward_zero_std": 0.0, "grad_norm": 3.0206824384871216, "kl": 0.01027679443359375, "learning_rate": 9.881052437227952e-07, "loss": -0.0319, "num_tokens": 35929430.0, "reward": 0.0, "reward_std": 0.7520310878753662, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06532731103826626, "rewards/wordcountpos_reward/raw_geo/std": 0.06818029730548754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12765694770084507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1090.375, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.16643328665733145, "frac_reward_zero_std": 0.0, "grad_norm": 3.4711476983162988, "kl": 0.0103912353515625, "learning_rate": 9.88033366764819e-07, "loss": -0.0206, "num_tokens": 35972844.0, "reward": 7.450580596923828e-09, "reward_std": 1.0510834455490112, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03350021858584557, "rewards/wordcountpos_reward/raw_geo/std": 0.16770095998812162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.16663332666533306, "frac_reward_zero_std": 0.0, "grad_norm": 3.3259332391092387, "kl": 0.00848388671875, "learning_rate": 9.879612762197843e-07, "loss": 0.0148, "num_tokens": 36009006.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8108147978782654, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026350005905452747, "rewards/wordcountpos_reward/raw_geo/std": 0.11636202878194005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1231.875, "completions/mean_terminated_length": 1214.0001220703125, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.16683336667333468, "frac_reward_zero_std": 0.0, "grad_norm": 3.3645760741066466, "kl": 0.0110931396484375, "learning_rate": 9.878889721228426e-07, "loss": -0.0166, "num_tokens": 36044804.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7991166114807129, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16169766228623994, "rewards/wordcountpos_reward/raw_geo/std": 0.1746051860732521, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1292.6875, "completions/mean_terminated_length": 1131.4444580078125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.16703340668133626, "frac_reward_zero_std": 0.0, "grad_norm": 3.153574265973643, "kl": 0.0108642578125, "learning_rate": 9.878164545092496e-07, "loss": -0.0421, "num_tokens": 36099175.0, "reward": 5.960464477539063e-08, "reward_std": 0.2914598882198334, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07879924866288396, "rewards/wordcountpos_reward/raw_geo/std": 0.11702559558664888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1213.625, "completions/mean_terminated_length": 1118.166748046875, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.16723344668933787, "frac_reward_zero_std": 0.0, "grad_norm": 3.1085429149419626, "kl": 0.00798797607421875, "learning_rate": 9.877437234143653e-07, "loss": -0.0734, "num_tokens": 36141065.0, "reward": 0.0, "reward_std": 0.9657445549964905, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024942923094714117, "rewards/wordcountpos_reward/raw_geo/std": 0.2931455338558853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1196.0, "completions/mean_terminated_length": 1175.7333984375, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.16743348669733946, "frac_reward_zero_std": 0.0, "grad_norm": 3.2298411346750173, "kl": 0.01171875, "learning_rate": 9.876707788736539e-07, "loss": -0.0261, "num_tokens": 36186697.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5676624178886414, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10441925263890464, "rewards/wordcountpos_reward/raw_geo/std": 0.16120070604233722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1212.75, "completions/mean_terminated_length": 1193.60009765625, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.16763352670534107, "frac_reward_zero_std": 0.0, "grad_norm": 2.9016260329532506, "kl": 0.0107269287109375, "learning_rate": 9.87597620922683e-07, "loss": -0.0279, "num_tokens": 36233013.0, "reward": 0.0, "reward_std": 0.8022392392158508, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025178464281762902, "rewards/wordcountpos_reward/raw_geo/std": 0.12770867486555654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1093.1875, "completions/mean_terminated_length": 1093.1875, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.16783356671334268, "frac_reward_zero_std": 0.0, "grad_norm": 2.675755047730772, "kl": 0.00608062744140625, "learning_rate": 9.875242495971252e-07, "loss": -0.0327, "num_tokens": 36275192.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4860745370388031, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19239438501046435, "rewards/wordcountpos_reward/raw_geo/std": 0.22574746432186216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.16803360672134426, "frac_reward_zero_std": 0.0, "grad_norm": 3.2960290289495062, "kl": 0.0106201171875, "learning_rate": 9.874506649327567e-07, "loss": -0.0314, "num_tokens": 36318643.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6247957348823547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07496458962923289, "rewards/wordcountpos_reward/raw_geo/std": 0.07462757331304543, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1066.75, "completions/mean_terminated_length": 1066.75, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.16823364672934588, "frac_reward_zero_std": 0.0, "grad_norm": 3.570090926509135, "kl": 0.0112457275390625, "learning_rate": 9.873768669654575e-07, "loss": -0.0123, "num_tokens": 36356287.0, "reward": -7.450580596923828e-09, "reward_std": 1.0390896797180176, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.01660654291588695, "rewards/wordcountpos_reward/raw_geo/std": 0.1253223571789788, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1586400537905439, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1179.5625, "completions/mean_terminated_length": 1179.5625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.16843368673734746, "frac_reward_zero_std": 0.0, "grad_norm": 2.8365201872296817, "kl": 0.007781982421875, "learning_rate": 9.873028557312117e-07, "loss": 0.029, "num_tokens": 36402400.0, "reward": 0.0, "reward_std": 0.4980078935623169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22886109529388468, "rewards/wordcountpos_reward/raw_geo/std": 0.11052645354135353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 1103.875, "completions/mean_terminated_length": 1077.4666748046875, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.16863372674534907, "frac_reward_zero_std": 0.0, "grad_norm": 3.646939344659468, "kl": 0.0114593505859375, "learning_rate": 9.872286312661077e-07, "loss": -0.0305, "num_tokens": 36451870.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8521960973739624, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05802641712921642, "rewards/wordcountpos_reward/raw_geo/std": 0.11510962727771128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17299111516469837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1181.0625, "completions/mean_terminated_length": 1181.0625, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.16883376675335068, "frac_reward_zero_std": 0.0, "grad_norm": 2.580314473414636, "kl": 0.0074310302734375, "learning_rate": 9.87154193606338e-07, "loss": -0.0082, "num_tokens": 36489727.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9301730394363403, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06548249343952822, "rewards/wordcountpos_reward/raw_geo/std": 0.2759959113227013, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1142.9375, "completions/mean_terminated_length": 980.6364135742188, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.16903380676135227, "frac_reward_zero_std": 0.0, "grad_norm": 3.3568736971519644, "kl": 0.010650634765625, "learning_rate": 9.87079542788198e-07, "loss": -0.0385, "num_tokens": 36543862.0, "reward": 0.0, "reward_std": 0.8167658448219299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03506218231564674, "rewards/wordcountpos_reward/raw_geo/std": 0.03345129712581062, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923409, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1218.375, "completions/mean_terminated_length": 1178.1429443359375, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.16923384676935388, "frac_reward_zero_std": 0.0, "grad_norm": 3.239560731686645, "kl": 0.0119171142578125, "learning_rate": 9.870046788480884e-07, "loss": 0.0102, "num_tokens": 36595604.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9169098138809204, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08550238941997734, "rewards/wordcountpos_reward/raw_geo/std": 0.13310127448859277, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1342.25, "completions/mean_terminated_length": 1270.5455322265625, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.16943388677735546, "frac_reward_zero_std": 0.0, "grad_norm": 2.346578205904879, "kl": 0.005908966064453125, "learning_rate": 9.86929601822513e-07, "loss": -0.0033, "num_tokens": 36639392.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9574987888336182, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059590097014098446, "rewards/wordcountpos_reward/raw_geo/std": 0.08313618948298293, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1082.3125, "completions/mean_terminated_length": 1082.3125, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.16963392678535708, "frac_reward_zero_std": 0.0, "grad_norm": 2.2516611101547217, "kl": 0.00492095947265625, "learning_rate": 9.868543117480798e-07, "loss": -0.0135, "num_tokens": 36676021.0, "reward": 0.0, "reward_std": 0.9351532459259033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022385136957765714, "rewards/wordcountpos_reward/raw_geo/std": 0.06909719484897522, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1240.0, "completions/mean_terminated_length": 1202.857177734375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16983396679335866, "frac_reward_zero_std": 0.0, "grad_norm": 3.4621050117357357, "kl": 0.01080322265625, "learning_rate": 9.867788086615001e-07, "loss": -0.0077, "num_tokens": 36726237.0, "reward": 0.0, "reward_std": 0.6256131529808044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1307869089018581, "rewards/wordcountpos_reward/raw_geo/std": 0.24786893527117212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 969.875, "completions/mean_terminated_length": 969.875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.17003400680136027, "frac_reward_zero_std": 0.0, "grad_norm": 3.471817323463469, "kl": 0.0121307373046875, "learning_rate": 9.867030925995905e-07, "loss": -0.0152, "num_tokens": 36774363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8764817714691162, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06636729819860808, "rewards/wordcountpos_reward/raw_geo/std": 0.09451279792465354, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1035.0, "completions/mean_terminated_length": 1035.0, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.17023404680936188, "frac_reward_zero_std": 0.0, "grad_norm": 3.5072299805195826, "kl": 0.0092315673828125, "learning_rate": 9.866271635992694e-07, "loss": -0.0022, "num_tokens": 36820011.0, "reward": 0.0, "reward_std": 0.9494242072105408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035242739353180536, "rewards/wordcountpos_reward/raw_geo/std": 0.05995409400013748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1171.4375, "completions/mean_terminated_length": 1061.916748046875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.17043408681736347, "frac_reward_zero_std": 0.0, "grad_norm": 3.22865222888431, "kl": 0.00585174560546875, "learning_rate": 9.86551021697561e-07, "loss": -0.0285, "num_tokens": 36869810.0, "reward": 0.0, "reward_std": 0.9019524455070496, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029357918366290644, "rewards/wordcountpos_reward/raw_geo/std": 0.07790593362666698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1233.6875, "completions/mean_terminated_length": 1172.2308349609375, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.17063412682536508, "frac_reward_zero_std": 0.0, "grad_norm": 2.8256145218580735, "kl": 0.00804901123046875, "learning_rate": 9.864746669315918e-07, "loss": -0.0538, "num_tokens": 36917765.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6553990840911865, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1237890383112415, "rewards/wordcountpos_reward/raw_geo/std": 0.07707539071075298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1152.5, "completions/mean_terminated_length": 1102.857177734375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.17083416683336666, "frac_reward_zero_std": 0.0, "grad_norm": 3.3346319391627945, "kl": 0.009765625, "learning_rate": 9.863980993385931e-07, "loss": -0.1352, "num_tokens": 36959477.0, "reward": 0.0, "reward_std": 0.8577319979667664, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09534393491413318, "rewards/wordcountpos_reward/raw_geo/std": 0.17586537122696005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17924739783224086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1266.875, "completions/mean_terminated_length": 1213.0770263671875, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.17103420684136827, "frac_reward_zero_std": 0.0, "grad_norm": 2.7530032365567605, "kl": 0.00656890869140625, "learning_rate": 9.863213189558996e-07, "loss": 0.0237, "num_tokens": 37002299.0, "reward": 0.0, "reward_std": 0.6090317964553833, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12083198358743402, "rewards/wordcountpos_reward/raw_geo/std": 0.12608577630200732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1081.5625, "completions/mean_terminated_length": 1081.5625, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.1712342468493699, "frac_reward_zero_std": 0.0, "grad_norm": 2.97957555214881, "kl": 0.00861358642578125, "learning_rate": 9.862443258209496e-07, "loss": 0.0027, "num_tokens": 37046996.0, "reward": 0.0, "reward_std": 0.7198086380958557, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0016181737007230192, "rewards/wordcountpos_reward/raw_geo/std": 0.16140078299663677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1014.6875, "completions/mean_terminated_length": 1014.6875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.17143428685737147, "frac_reward_zero_std": 0.0, "grad_norm": 3.5161266672864495, "kl": 0.0102996826171875, "learning_rate": 9.861671199712855e-07, "loss": -0.0269, "num_tokens": 37090447.0, "reward": 0.0, "reward_std": 0.809076189994812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01242068897396777, "rewards/wordcountpos_reward/raw_geo/std": 0.0716704015239749, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1280.5625, "completions/mean_terminated_length": 1207.416748046875, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.17163432686537308, "frac_reward_zero_std": 0.0, "grad_norm": 2.7494771077513507, "kl": 0.007396697998046875, "learning_rate": 9.86089701444553e-07, "loss": -0.0157, "num_tokens": 37133632.0, "reward": 0.0, "reward_std": 0.5005857348442078, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.044719628188765674, "rewards/wordcountpos_reward/raw_geo/std": 0.06758191509380684, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.2345997379304526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1036.625, "completions/mean_terminated_length": 1036.625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.17183436687337467, "frac_reward_zero_std": 0.0, "grad_norm": 3.501125289802964, "kl": 0.00848388671875, "learning_rate": 9.86012070278502e-07, "loss": -0.0239, "num_tokens": 37178698.0, "reward": 2.9802322387695312e-08, "reward_std": 0.36654722690582275, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21775856565220852, "rewards/wordcountpos_reward/raw_geo/std": 0.23885335881978279, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18993176162525865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1163.375, "completions/mean_terminated_length": 1163.375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.17203440688137628, "frac_reward_zero_std": 0.0, "grad_norm": 3.1902520958967617, "kl": 0.00782012939453125, "learning_rate": 9.859342265109856e-07, "loss": 0.0046, "num_tokens": 37227416.0, "reward": 0.0, "reward_std": 0.9346041679382324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.054860423844448405, "rewards/wordcountpos_reward/raw_geo/std": 0.29577180819468957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1275.0625, "completions/mean_terminated_length": 1140.0999755859375, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.17223444688937786, "frac_reward_zero_std": 0.0, "grad_norm": 3.6784830776347937, "kl": 0.0117340087890625, "learning_rate": 9.858561701799606e-07, "loss": -0.0267, "num_tokens": 37282897.0, "reward": -2.2351741790771484e-08, "reward_std": 1.056344747543335, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0330270012555121, "rewards/wordcountpos_reward/raw_geo/std": 0.10900429159144488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1164.75, "completions/mean_terminated_length": 1116.857177734375, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.17243448689737947, "frac_reward_zero_std": 0.0, "grad_norm": 3.3338210402426216, "kl": 0.00772857666015625, "learning_rate": 9.85777901323488e-07, "loss": 0.0154, "num_tokens": 37326093.0, "reward": 0.0, "reward_std": 0.8620393872261047, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11497980763880261, "rewards/wordcountpos_reward/raw_geo/std": 0.2557735068149251, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 997.3125, "completions/mean_terminated_length": 963.800048828125, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.17263452690538109, "frac_reward_zero_std": 0.0, "grad_norm": 2.9005813806817704, "kl": 0.00928497314453125, "learning_rate": 9.856994199797317e-07, "loss": -0.0495, "num_tokens": 37366794.0, "reward": 0.0, "reward_std": 1.0504605770111084, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05551411403029644, "rewards/wordcountpos_reward/raw_geo/std": 0.07440455569604205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684815, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1168.75, "completions/mean_terminated_length": 1168.75, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.17283456691338267, "frac_reward_zero_std": 0.0, "grad_norm": 2.8040178548787384, "kl": 0.00560760498046875, "learning_rate": 9.8562072618696e-07, "loss": 0.0257, "num_tokens": 37400982.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0157716274261475, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044969030900224624, "rewards/wordcountpos_reward/raw_geo/std": 0.08188372432840825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1213.5625, "completions/mean_terminated_length": 1194.4666748046875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.17303460692138428, "frac_reward_zero_std": 0.0, "grad_norm": 3.431958043182085, "kl": 0.00848388671875, "learning_rate": 9.85541819983544e-07, "loss": 0.0076, "num_tokens": 37443231.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0461863279342651, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19003258497671213, "rewards/wordcountpos_reward/raw_geo/std": 0.26364405035333305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1374.75, "completions/mean_terminated_length": 1345.84619140625, "completions/min_length": 1199.0, "completions/min_terminated_length": 1199.0, "epoch": 0.17323464692938587, "frac_reward_zero_std": 0.0, "grad_norm": 2.3402116113958384, "kl": 0.003650665283203125, "learning_rate": 9.854627014079588e-07, "loss": 0.0031, "num_tokens": 37488171.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8610503673553467, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04421237593881027, "rewards/wordcountpos_reward/raw_geo/std": 0.2047881506706108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327549, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1103.0625, "completions/mean_terminated_length": 1103.0625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.17343468693738748, "frac_reward_zero_std": 0.0, "grad_norm": 3.558773114190285, "kl": 0.0107879638671875, "learning_rate": 9.853833704987831e-07, "loss": -0.003, "num_tokens": 37539396.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0214763879776, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2440186421896916, "rewards/wordcountpos_reward/raw_geo/std": 0.2170722808575906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1070.0, "completions/max_terminated_length": 1070.0, "completions/mean_length": 951.125, "completions/mean_terminated_length": 951.125, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.1736347269453891, "frac_reward_zero_std": 0.0, "grad_norm": 3.139169286310432, "kl": 0.004642486572265625, "learning_rate": 9.85303827294699e-07, "loss": -0.0059, "num_tokens": 37576446.0, "reward": 0.0, "reward_std": 1.0224297046661377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12126069886016867, "rewards/wordcountpos_reward/raw_geo/std": 0.08884290870369663, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1229.5625, "completions/mean_terminated_length": 1229.5625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.17383476695339067, "frac_reward_zero_std": 0.0, "grad_norm": 3.2712650974992714, "kl": 0.009857177734375, "learning_rate": 9.852240718344919e-07, "loss": -0.0169, "num_tokens": 37625575.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6319416165351868, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20580616187448658, "rewards/wordcountpos_reward/raw_geo/std": 0.3384619816635388, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1243.1875, "completions/mean_terminated_length": 1183.923095703125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.17403480696139229, "frac_reward_zero_std": 0.0, "grad_norm": 2.986178046304281, "kl": 0.00699615478515625, "learning_rate": 9.85144104157051e-07, "loss": -0.0229, "num_tokens": 37680562.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8674370050430298, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007906180059571218, "rewards/wordcountpos_reward/raw_geo/std": 0.05594172712157067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1166.1875, "completions/mean_terminated_length": 1089.1539306640625, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.17423484696939387, "frac_reward_zero_std": 0.0, "grad_norm": 2.794130420549919, "kl": 0.006622314453125, "learning_rate": 9.85063924301369e-07, "loss": -0.0089, "num_tokens": 37722685.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9355672597885132, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007497083548027235, "rewards/wordcountpos_reward/raw_geo/std": 0.15743106226327738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 993.625, "completions/mean_terminated_length": 993.625, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.17443488697739548, "frac_reward_zero_std": 0.0, "grad_norm": 2.4544196302680175, "kl": 0.00536346435546875, "learning_rate": 9.84983532306542e-07, "loss": 0.0103, "num_tokens": 37755543.0, "reward": 0.0, "reward_std": 1.0122637748718262, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0657117590214513, "rewards/wordcountpos_reward/raw_geo/std": 0.04112101067461087, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1095.6875, "completions/mean_terminated_length": 1095.6875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.1746349269853971, "frac_reward_zero_std": 0.0, "grad_norm": 3.2791711267773946, "kl": 0.008941650390625, "learning_rate": 9.849029282117692e-07, "loss": -0.0442, "num_tokens": 37798506.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7395102977752686, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13161597270699313, "rewards/wordcountpos_reward/raw_geo/std": 0.26481364284519454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1212.0625, "completions/mean_terminated_length": 1212.0625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.17483496699339868, "frac_reward_zero_std": 0.0, "grad_norm": 3.2264562739264093, "kl": 0.007110595703125, "learning_rate": 9.84822112056354e-07, "loss": 0.0071, "num_tokens": 37842683.0, "reward": -2.9802322387695312e-08, "reward_std": 0.783845841884613, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11506658318714942, "rewards/wordcountpos_reward/raw_geo/std": 0.12302823309476989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1228.75, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.1750350070014003, "frac_reward_zero_std": 0.0, "grad_norm": 3.4114230524533706, "kl": 0.0085906982421875, "learning_rate": 9.847410838797023e-07, "loss": -0.0772, "num_tokens": 37885023.0, "reward": 1.6763806343078613e-08, "reward_std": 1.0192160606384277, "rewards/wordcountpos_reward/mean": 1.6763806343078613e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15263395326928883, "rewards/wordcountpos_reward/raw_geo/std": 0.08672447917645187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1290.5625, "completions/mean_terminated_length": 1276.60009765625, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.17523504700940187, "frac_reward_zero_std": 0.0, "grad_norm": 2.5893669634215977, "kl": 0.00649261474609375, "learning_rate": 9.846598437213241e-07, "loss": -0.0134, "num_tokens": 37932384.0, "reward": 0.0, "reward_std": 0.5021741986274719, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0873640607521561, "rewards/wordcountpos_reward/raw_geo/std": 0.2444931068678306, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1293.5, "completions/mean_terminated_length": 1245.84619140625, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.17543508701740348, "frac_reward_zero_std": 0.0, "grad_norm": 2.9555129026048723, "kl": 0.00737762451171875, "learning_rate": 9.845783916208325e-07, "loss": 0.0133, "num_tokens": 37973256.0, "reward": 0.0, "reward_std": 0.9664082527160645, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08951282854205395, "rewards/wordcountpos_reward/raw_geo/std": 0.07160204705473998, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1129.5, "completions/mean_terminated_length": 1129.5, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.17563512702540507, "frac_reward_zero_std": 0.0, "grad_norm": 3.5319703710550185, "kl": 0.0093994140625, "learning_rate": 9.844967276179435e-07, "loss": -0.0171, "num_tokens": 38024640.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0311518907546997, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12415629319911645, "rewards/wordcountpos_reward/raw_geo/std": 0.10857688943831809, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1052.5, "completions/mean_terminated_length": 1022.666748046875, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.17583516703340668, "frac_reward_zero_std": 0.0, "grad_norm": 3.1798805596283968, "kl": 0.00890350341796875, "learning_rate": 9.844148517524772e-07, "loss": -0.0122, "num_tokens": 38062472.0, "reward": 1.4901161193847656e-08, "reward_std": 0.994231104850769, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19620229254848678, "rewards/wordcountpos_reward/raw_geo/std": 0.08364270125635723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1186.8125, "completions/mean_terminated_length": 1165.933349609375, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.1760352070414083, "frac_reward_zero_std": 0.0, "grad_norm": 2.613293452292534, "kl": 0.007110595703125, "learning_rate": 9.843327640643566e-07, "loss": 0.03, "num_tokens": 38100653.0, "reward": 0.0, "reward_std": 0.6162838935852051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03411647417153821, "rewards/wordcountpos_reward/raw_geo/std": 0.113682835186528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1260.0625, "completions/mean_terminated_length": 1225.7857666015625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.17623524704940988, "frac_reward_zero_std": 0.0, "grad_norm": 2.8256290691578374, "kl": 0.00856781005859375, "learning_rate": 9.842504645936078e-07, "loss": -0.0471, "num_tokens": 38153846.0, "reward": 0.0, "reward_std": 0.6246308088302612, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.012818517251148596, "rewards/wordcountpos_reward/raw_geo/std": 0.269361004862285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 1076.375, "completions/mean_terminated_length": 1076.375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1764352870574115, "frac_reward_zero_std": 0.0, "grad_norm": 3.4294388687230777, "kl": 0.00981903076171875, "learning_rate": 9.84167953380361e-07, "loss": -0.0163, "num_tokens": 38195596.0, "reward": -7.450580596923828e-09, "reward_std": 1.0519238710403442, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09750724834092946, "rewards/wordcountpos_reward/raw_geo/std": 0.045521534897174275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1173.375, "completions/mean_terminated_length": 1126.71435546875, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.17663532706541307, "frac_reward_zero_std": 0.0, "grad_norm": 3.324748927064528, "kl": 0.00524139404296875, "learning_rate": 9.840852304648481e-07, "loss": -0.0903, "num_tokens": 38244490.0, "reward": 3.725290298461914e-09, "reward_std": 1.0523391962051392, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.029037714097839465, "rewards/wordcountpos_reward/raw_geo/std": 0.19357625351024577, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1146.9375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.17683536707341468, "frac_reward_zero_std": 0.0, "grad_norm": 2.321176089301189, "kl": 0.00499725341796875, "learning_rate": 9.84002295887406e-07, "loss": -0.0137, "num_tokens": 38295665.0, "reward": 0.0, "reward_std": 0.9161946773529053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06034120247129848, "rewards/wordcountpos_reward/raw_geo/std": 0.08911955422612272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1344.0625, "completions/mean_terminated_length": 1250.5, "completions/min_length": 1115.0, "completions/min_terminated_length": 1115.0, "epoch": 0.1770354070814163, "frac_reward_zero_std": 0.0, "grad_norm": 2.62096170888923, "kl": 0.008819580078125, "learning_rate": 9.839191496884736e-07, "loss": -0.0153, "num_tokens": 38351202.0, "reward": 0.0, "reward_std": 0.6652466058731079, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0313640343049726, "rewards/wordcountpos_reward/raw_geo/std": 0.09490847987615997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1224.4375, "completions/mean_terminated_length": 1160.84619140625, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.17723544708941788, "frac_reward_zero_std": 0.0, "grad_norm": 2.1787585672285625, "kl": 0.00435638427734375, "learning_rate": 9.838357919085933e-07, "loss": 0.0222, "num_tokens": 38397889.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0030863285064697, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1977029944399264, "rewards/wordcountpos_reward/raw_geo/std": 0.14663613845897422, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1193.5625, "completions/mean_terminated_length": 1149.7857666015625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.1774354870974195, "frac_reward_zero_std": 0.0, "grad_norm": 2.1622229550918166, "kl": 0.0045013427734375, "learning_rate": 9.83752222588411e-07, "loss": 0.0205, "num_tokens": 38451378.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8524837493896484, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06193691803381681, "rewards/wordcountpos_reward/raw_geo/std": 0.06287351706635681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1137.75, "completions/mean_terminated_length": 1137.75, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.17763552710542108, "frac_reward_zero_std": 0.0, "grad_norm": 3.5089046382192826, "kl": 0.008880615234375, "learning_rate": 9.836684417686754e-07, "loss": 0.0022, "num_tokens": 38504710.0, "reward": 3.725290298461914e-09, "reward_std": 1.0668672323226929, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.198020197803301, "rewards/wordcountpos_reward/raw_geo/std": 0.1000581027422953, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042253, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1086.8125, "completions/mean_terminated_length": 1086.8125, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.1778355671134227, "frac_reward_zero_std": 0.0, "grad_norm": 2.9316485665738896, "kl": 0.00653076171875, "learning_rate": 9.835844494902381e-07, "loss": -0.0053, "num_tokens": 38547659.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9732360243797302, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13120952575304273, "rewards/wordcountpos_reward/raw_geo/std": 0.15881552924067985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1176.75, "completions/mean_terminated_length": 1102.1539306640625, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.17803560712142427, "frac_reward_zero_std": 0.0, "grad_norm": 2.4367797913602853, "kl": 0.00501251220703125, "learning_rate": 9.835002457940543e-07, "loss": 0.0488, "num_tokens": 38598455.0, "reward": 0.0, "reward_std": 0.9611717462539673, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.047756964343114404, "rewards/wordcountpos_reward/raw_geo/std": 0.18119012155036263, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 987.7333984375, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.17823564712942588, "frac_reward_zero_std": 0.0, "grad_norm": 3.8807606846051383, "kl": 0.00872039794921875, "learning_rate": 9.834158307211825e-07, "loss": 0.0952, "num_tokens": 38628491.0, "reward": 0.0, "reward_std": 0.8411787748336792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1457158373757128, "rewards/wordcountpos_reward/raw_geo/std": 0.1014224524068365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1326.625, "completions/mean_terminated_length": 1286.615478515625, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.1784356871374275, "frac_reward_zero_std": 0.0, "grad_norm": 3.0549532092651024, "kl": 0.00742340087890625, "learning_rate": 9.833312043127835e-07, "loss": 0.0129, "num_tokens": 38667797.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5767874121665955, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21452767587548643, "rewards/wordcountpos_reward/raw_geo/std": 0.23639937938187505, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 945.875, "completions/mean_terminated_length": 945.875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.17863572714542908, "frac_reward_zero_std": 0.0, "grad_norm": 3.631812723072941, "kl": 0.00806427001953125, "learning_rate": 9.832463666101215e-07, "loss": 0.0289, "num_tokens": 38711539.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0570076704025269, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016045338430651354, "rewards/wordcountpos_reward/raw_geo/std": 0.0602284217619523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 1022.8125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.1788357671534307, "frac_reward_zero_std": 0.0, "grad_norm": 2.80153128658189, "kl": 0.00571441650390625, "learning_rate": 9.831613176545637e-07, "loss": 0.0138, "num_tokens": 38747176.0, "reward": 0.0, "reward_std": 0.8905612230300903, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04089018645959631, "rewards/wordcountpos_reward/raw_geo/std": 0.04129331106827069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1156.0, "completions/mean_terminated_length": 1156.0, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.17903580716143228, "frac_reward_zero_std": 0.0, "grad_norm": 2.3144176664324854, "kl": 0.005451202392578125, "learning_rate": 9.830760574875806e-07, "loss": 0.0055, "num_tokens": 38786768.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6741729974746704, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08762354669870973, "rewards/wordcountpos_reward/raw_geo/std": 0.052466024193268776, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1266.8125, "completions/mean_terminated_length": 1251.2667236328125, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.1792358471694339, "frac_reward_zero_std": 0.0, "grad_norm": 2.522072825111293, "kl": 0.0058135986328125, "learning_rate": 9.829905861507453e-07, "loss": 0.0039, "num_tokens": 38833005.0, "reward": 5.960464477539063e-08, "reward_std": 0.7966471910476685, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07486677155101398, "rewards/wordcountpos_reward/raw_geo/std": 0.10715636968913805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1266.8125, "completions/mean_terminated_length": 1213.0, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.1794358871774355, "frac_reward_zero_std": 0.0, "grad_norm": 2.8231646880918118, "kl": 0.00652313232421875, "learning_rate": 9.829049036857338e-07, "loss": -0.0262, "num_tokens": 38884874.0, "reward": 0.0, "reward_std": 0.9405481815338135, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.032435498550571734, "rewards/wordcountpos_reward/raw_geo/std": 0.14189761258588804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1118.5625, "completions/mean_terminated_length": 1118.5625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.17963592718543708, "frac_reward_zero_std": 0.0, "grad_norm": 2.841819918917027, "kl": 0.00865936279296875, "learning_rate": 9.82819010134326e-07, "loss": 0.0175, "num_tokens": 38930763.0, "reward": 0.0, "reward_std": 0.8674912452697754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.179958831942811, "rewards/wordcountpos_reward/raw_geo/std": 0.1269351332976292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 910.1875, "completions/mean_terminated_length": 910.1875, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.1798359671934387, "frac_reward_zero_std": 0.0, "grad_norm": 4.234469351243712, "kl": 0.0094757080078125, "learning_rate": 9.827329055384031e-07, "loss": 0.0094, "num_tokens": 38970670.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9956492185592651, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012990841045994963, "rewards/wordcountpos_reward/raw_geo/std": 0.0971725266844798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1197.0625, "completions/mean_terminated_length": 1015.2999877929688, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.18003600720144028, "frac_reward_zero_std": 0.0, "grad_norm": 2.8201012187429466, "kl": 0.0067901611328125, "learning_rate": 9.826465899399504e-07, "loss": -0.0114, "num_tokens": 39022455.0, "reward": 0.0, "reward_std": 0.6320232152938843, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16034238006822255, "rewards/wordcountpos_reward/raw_geo/std": 0.100569567402025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1270.375, "completions/mean_terminated_length": 1255.0667724609375, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.1802360472094419, "frac_reward_zero_std": 0.0, "grad_norm": 3.213694438490083, "kl": 0.0090179443359375, "learning_rate": 9.82560063381056e-07, "loss": 0.0035, "num_tokens": 39069997.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9283663034439087, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13790807469382152, "rewards/wordcountpos_reward/raw_geo/std": 0.15510724745403243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1206.5, "completions/mean_terminated_length": 1206.5, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.18043608721744347, "frac_reward_zero_std": 0.0, "grad_norm": 2.8537674851302355, "kl": 0.00579833984375, "learning_rate": 9.824733259039104e-07, "loss": -0.0028, "num_tokens": 39111717.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9624330997467041, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18702226892966964, "rewards/wordcountpos_reward/raw_geo/std": 0.14591069436031148, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1246.6875, "completions/mean_terminated_length": 1210.5, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.1806361272254451, "frac_reward_zero_std": 0.0, "grad_norm": 2.813198369721467, "kl": 0.00720977783203125, "learning_rate": 9.823863775508072e-07, "loss": -0.0231, "num_tokens": 39159000.0, "reward": -2.9802322387695312e-08, "reward_std": 1.008222222328186, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04658171978607431, "rewards/wordcountpos_reward/raw_geo/std": 0.06365656350168548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14851112939963645, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1140.125, "completions/mean_terminated_length": 1140.125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.1808361672334467, "frac_reward_zero_std": 0.0, "grad_norm": 3.093059159319179, "kl": 0.00778961181640625, "learning_rate": 9.822992183641429e-07, "loss": -0.0097, "num_tokens": 39210730.0, "reward": 5.960464477539063e-08, "reward_std": 0.7320858240127563, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046864234907353926, "rewards/wordcountpos_reward/raw_geo/std": 0.15270220424044126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1400.0, "completions/mean_terminated_length": 1322.2222900390625, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.18103620724144828, "frac_reward_zero_std": 0.0, "grad_norm": 2.809287292883197, "kl": 0.0068206787109375, "learning_rate": 9.822118483864167e-07, "loss": 0.0086, "num_tokens": 39260570.0, "reward": 0.0, "reward_std": 0.9606954455375671, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10997422892097904, "rewards/wordcountpos_reward/raw_geo/std": 0.1741691170159194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 846.9375, "completions/mean_terminated_length": 846.9375, "completions/min_length": 508.0, "completions/min_terminated_length": 508.0, "epoch": 0.1812362472494499, "frac_reward_zero_std": 0.0, "grad_norm": 3.5644179744507105, "kl": 0.00801849365234375, "learning_rate": 9.821242676602308e-07, "loss": -0.0214, "num_tokens": 39286105.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7069605588912964, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.047907115663022824, "rewards/wordcountpos_reward/raw_geo/std": 0.0681875910477015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1231.0625, "completions/mean_terminated_length": 1169.0, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.18143628725745148, "frac_reward_zero_std": 0.0, "grad_norm": 3.3476209440387974, "kl": 0.0085601806640625, "learning_rate": 9.820364762282896e-07, "loss": 0.0176, "num_tokens": 39331026.0, "reward": 0.0, "reward_std": 0.7648845911026001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.247400421269785, "rewards/wordcountpos_reward/raw_geo/std": 0.3115951254045245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1049.916748046875, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.1816363272654531, "frac_reward_zero_std": 0.0, "grad_norm": 3.031334611482978, "kl": 0.00675201416015625, "learning_rate": 9.819484741334009e-07, "loss": 0.0396, "num_tokens": 39366257.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0533500909805298, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13639401083969963, "rewards/wordcountpos_reward/raw_geo/std": 0.24310983160467106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1082.9375, "completions/mean_terminated_length": 1082.9375, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.1818363672734547, "frac_reward_zero_std": 0.0, "grad_norm": 2.915696897818177, "kl": 0.007659912109375, "learning_rate": 9.818602614184745e-07, "loss": -0.0303, "num_tokens": 39409560.0, "reward": 0.0, "reward_std": 0.981995701789856, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.057409482703430145, "rewards/wordcountpos_reward/raw_geo/std": 0.05842319473447109, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1054.625, "completions/mean_terminated_length": 1024.933349609375, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.1820364072814563, "frac_reward_zero_std": 0.0, "grad_norm": 3.095718240065994, "kl": 0.00711822509765625, "learning_rate": 9.817718381265238e-07, "loss": 0.0101, "num_tokens": 39451130.0, "reward": 0.0, "reward_std": 0.645683765411377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.059412785608560406, "rewards/wordcountpos_reward/raw_geo/std": 0.04698696168490984, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1209.3125, "completions/mean_terminated_length": 1189.933349609375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.1822364472894579, "frac_reward_zero_std": 0.0, "grad_norm": 2.9231185689219443, "kl": 0.00673675537109375, "learning_rate": 9.81683204300664e-07, "loss": -0.058, "num_tokens": 39493047.0, "reward": 7.450580596923828e-09, "reward_std": 1.0139415264129639, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07672315141229276, "rewards/wordcountpos_reward/raw_geo/std": 0.05076262441087369, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 990.6875, "completions/mean_terminated_length": 990.6875, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.18243648729745948, "frac_reward_zero_std": 0.0, "grad_norm": 2.92171306604703, "kl": 0.005954742431640625, "learning_rate": 9.815943599841138e-07, "loss": -0.006, "num_tokens": 39522746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9515438079833984, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013930748266304718, "rewards/wordcountpos_reward/raw_geo/std": 0.0485237743724745, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 907.75, "completions/mean_terminated_length": 907.75, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.1826365273054611, "frac_reward_zero_std": 0.0, "grad_norm": 4.039595263979884, "kl": 0.0097808837890625, "learning_rate": 9.815053052201938e-07, "loss": -0.0177, "num_tokens": 39561846.0, "reward": 5.960464477539063e-08, "reward_std": 0.8298944234848022, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1072343145369315, "rewards/wordcountpos_reward/raw_geo/std": 0.0901326456376238, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1093.4375, "completions/mean_terminated_length": 1066.3333740234375, "completions/min_length": 575.0, "completions/min_terminated_length": 575.0, "epoch": 0.1828365673134627, "frac_reward_zero_std": 0.0, "grad_norm": 2.998229941507805, "kl": 0.00606536865234375, "learning_rate": 9.814160400523274e-07, "loss": -0.0318, "num_tokens": 39611597.0, "reward": 0.0, "reward_std": 0.9494115114212036, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00804665575599557, "rewards/wordcountpos_reward/raw_geo/std": 0.01842191930143263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1000.1875, "completions/mean_terminated_length": 1000.1875, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.1830366073214643, "frac_reward_zero_std": 0.0, "grad_norm": 3.5863036384839733, "kl": 0.008045196533203125, "learning_rate": 9.81326564524041e-07, "loss": -0.0436, "num_tokens": 39662624.0, "reward": 0.0, "reward_std": 0.7314053177833557, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05659158533015089, "rewards/wordcountpos_reward/raw_geo/std": 0.07311513878055974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 994.6875, "completions/mean_terminated_length": 994.6875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.1832366473294659, "frac_reward_zero_std": 0.0, "grad_norm": 2.4923078467428557, "kl": 0.0088653564453125, "learning_rate": 9.81236878678963e-07, "loss": 0.0181, "num_tokens": 39695147.0, "reward": 0.0, "reward_std": 0.5605810284614563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03669888385187112, "rewards/wordcountpos_reward/raw_geo/std": 0.09213379448036717, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1300.375, "completions/mean_terminated_length": 1254.3077392578125, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.18343668733746749, "frac_reward_zero_std": 0.0, "grad_norm": 3.0625207878777223, "kl": 0.00675201416015625, "learning_rate": 9.81146982560825e-07, "loss": -0.0184, "num_tokens": 39739689.0, "reward": -2.9802322387695312e-08, "reward_std": 0.544572114944458, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04276772020932102, "rewards/wordcountpos_reward/raw_geo/std": 0.0779179918129157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1237.125, "completions/mean_terminated_length": 1199.571533203125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.1836367273454691, "frac_reward_zero_std": 0.0, "grad_norm": 2.7928034829206565, "kl": 0.005916595458984375, "learning_rate": 9.810568762134602e-07, "loss": -0.0001, "num_tokens": 39787571.0, "reward": 0.0, "reward_std": 0.6140346527099609, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06258728712073611, "rewards/wordcountpos_reward/raw_geo/std": 0.14988995833111593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1287.5, "completions/mean_terminated_length": 1273.3333740234375, "completions/min_length": 1104.0, "completions/min_terminated_length": 1104.0, "epoch": 0.18383676735347068, "frac_reward_zero_std": 0.0, "grad_norm": 2.508562556398617, "kl": 0.0067596435546875, "learning_rate": 9.809665596808052e-07, "loss": 0.0104, "num_tokens": 39835755.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9353573322296143, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05361187372565858, "rewards/wordcountpos_reward/raw_geo/std": 0.2733418558052254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1126.25, "completions/mean_terminated_length": 1126.25, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.1840368073614723, "frac_reward_zero_std": 0.0, "grad_norm": 3.4605016606311287, "kl": 0.0082244873046875, "learning_rate": 9.808760330068989e-07, "loss": -0.0289, "num_tokens": 39878959.0, "reward": 0.0, "reward_std": 0.9318041801452637, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10305904704367594, "rewards/wordcountpos_reward/raw_geo/std": 0.08061109890416099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1268.25, "completions/mean_terminated_length": 1268.25, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.1842368473694739, "frac_reward_zero_std": 0.0, "grad_norm": 2.148218969910704, "kl": 0.0030364990234375, "learning_rate": 9.807852962358822e-07, "loss": 0.004, "num_tokens": 39916979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8309612274169922, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043274024465947955, "rewards/wordcountpos_reward/raw_geo/std": 0.27643031694670195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1034.5625, "completions/mean_terminated_length": 1034.5625, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.1844368873774755, "frac_reward_zero_std": 0.0, "grad_norm": 3.6725786354322207, "kl": 0.0082855224609375, "learning_rate": 9.806943494119989e-07, "loss": -0.0338, "num_tokens": 39951452.0, "reward": 4.470348358154297e-08, "reward_std": 0.9785497188568115, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08835530952891071, "rewards/wordcountpos_reward/raw_geo/std": 0.043336109940599006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 990.3125, "completions/mean_terminated_length": 956.3333740234375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.1846369273854771, "frac_reward_zero_std": 0.0, "grad_norm": 2.6022400172543607, "kl": 0.00708770751953125, "learning_rate": 9.806031925795951e-07, "loss": -0.2141, "num_tokens": 40004249.0, "reward": -1.862645149230957e-08, "reward_std": 0.8384362459182739, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08457805336744288, "rewards/wordcountpos_reward/raw_geo/std": 0.2404769504864457, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1238.75, "completions/mean_terminated_length": 1221.3333740234375, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.18483696739347868, "frac_reward_zero_std": 0.0, "grad_norm": 3.205288225011804, "kl": 0.00838470458984375, "learning_rate": 9.805118257831192e-07, "loss": -0.0194, "num_tokens": 40060021.0, "reward": 0.0, "reward_std": 1.054551124572754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06803175917655288, "rewards/wordcountpos_reward/raw_geo/std": 0.22152616310655693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1296.3125, "completions/mean_terminated_length": 1267.21435546875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.1850370074014803, "frac_reward_zero_std": 0.0, "grad_norm": 2.818805730571551, "kl": 0.00707244873046875, "learning_rate": 9.804202490671223e-07, "loss": -0.0544, "num_tokens": 40106034.0, "reward": -5.960464477539063e-08, "reward_std": 0.3621934652328491, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10248418901885131, "rewards/wordcountpos_reward/raw_geo/std": 0.1645897828360318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1125.625, "completions/mean_terminated_length": 1125.625, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.1852370474094819, "frac_reward_zero_std": 0.0, "grad_norm": 3.7229536699729144, "kl": 0.0099639892578125, "learning_rate": 9.803284624762575e-07, "loss": -0.0441, "num_tokens": 40149404.0, "reward": 0.0, "reward_std": 0.6757339239120483, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03311160811618911, "rewards/wordcountpos_reward/raw_geo/std": 0.1451890887697495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1073.5625, "completions/mean_terminated_length": 1073.5625, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.1854370874174835, "frac_reward_zero_std": 0.0, "grad_norm": 3.4811291230655197, "kl": 0.00765228271484375, "learning_rate": 9.8023646605528e-07, "loss": 0.0576, "num_tokens": 40198693.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4679914712905884, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07139827817899022, "rewards/wordcountpos_reward/raw_geo/std": 0.17635879176403244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1156.6875, "completions/mean_terminated_length": 1156.6875, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.1856371274254851, "frac_reward_zero_std": 0.0, "grad_norm": 2.8947509616842786, "kl": 0.00762176513671875, "learning_rate": 9.801442598490485e-07, "loss": -0.0308, "num_tokens": 40248576.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8155903816223145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03296823420794705, "rewards/wordcountpos_reward/raw_geo/std": 0.058206315890683286, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1069.25, "completions/mean_terminated_length": 1069.25, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.1858371674334867, "frac_reward_zero_std": 0.0, "grad_norm": 3.85629544001022, "kl": 0.012115478515625, "learning_rate": 9.800518439025223e-07, "loss": -0.0276, "num_tokens": 40301076.0, "reward": 1.4901161193847656e-08, "reward_std": 1.035229206085205, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024291140479526983, "rewards/wordcountpos_reward/raw_geo/std": 0.30076259184815257, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.13381856152046848, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1228.25, "completions/mean_terminated_length": 1228.25, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.1860372074414883, "frac_reward_zero_std": 0.0, "grad_norm": 2.6997257041282894, "kl": 0.005893707275390625, "learning_rate": 9.799592182607642e-07, "loss": 0.0115, "num_tokens": 40344344.0, "reward": 4.470348358154297e-08, "reward_std": 1.0092113018035889, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4039932754308158, "rewards/wordcountpos_reward/raw_geo/std": 0.2504430719790084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1372.5, "completions/mean_terminated_length": 1296.0, "completions/min_length": 1178.0, "completions/min_terminated_length": 1178.0, "epoch": 0.18623724744948988, "frac_reward_zero_std": 0.0, "grad_norm": 2.2987923499829175, "kl": 0.00505828857421875, "learning_rate": 9.79866382968939e-07, "loss": 0.0133, "num_tokens": 40391952.0, "reward": 0.0, "reward_std": 0.40316373109817505, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08187932635907305, "rewards/wordcountpos_reward/raw_geo/std": 0.2504759261413003, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1154.75, "completions/mean_terminated_length": 1131.7333984375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1864372874574915, "frac_reward_zero_std": 0.0, "grad_norm": 2.0311869702156775, "kl": 0.0056915283203125, "learning_rate": 9.797733380723133e-07, "loss": -0.0695, "num_tokens": 40424852.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5888725519180298, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03884571415396457, "rewards/wordcountpos_reward/raw_geo/std": 0.13365507589392253, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1163.9375, "completions/mean_terminated_length": 1086.3846435546875, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.1866373274654931, "frac_reward_zero_std": 0.0, "grad_norm": 2.713463046000283, "kl": 0.00612640380859375, "learning_rate": 9.796800836162565e-07, "loss": -0.049, "num_tokens": 40463659.0, "reward": 0.0, "reward_std": 0.9785711765289307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01299969202872348, "rewards/wordcountpos_reward/raw_geo/std": 0.05868218130798983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1033.5, "completions/mean_terminated_length": 1033.5, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.1868373674734947, "frac_reward_zero_std": 0.0, "grad_norm": 3.369930282506248, "kl": 0.01165771484375, "learning_rate": 9.795866196462397e-07, "loss": -0.0001, "num_tokens": 40511627.0, "reward": 2.9802322387695312e-08, "reward_std": 0.45107266306877136, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1155522557167349, "rewards/wordcountpos_reward/raw_geo/std": 0.1935156610313652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 811.9375, "completions/mean_terminated_length": 811.9375, "completions/min_length": 601.0, "completions/min_terminated_length": 601.0, "epoch": 0.1870374074814963, "frac_reward_zero_std": 0.0, "grad_norm": 4.163878345780283, "kl": 0.009735107421875, "learning_rate": 9.794929462078366e-07, "loss": -0.0531, "num_tokens": 40551186.0, "reward": -5.960464477539063e-08, "reward_std": 0.5556304454803467, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06432098510256544, "rewards/wordcountpos_reward/raw_geo/std": 0.1269093651061461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1059.8125, "completions/mean_terminated_length": 1059.8125, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.1872374474894979, "frac_reward_zero_std": 0.0, "grad_norm": 3.684987623639056, "kl": 0.01019287109375, "learning_rate": 9.793990633467225e-07, "loss": 0.0214, "num_tokens": 40594879.0, "reward": 3.725290298461914e-09, "reward_std": 1.0598126649856567, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.34002017180132854, "rewards/wordcountpos_reward/raw_geo/std": 0.17281953128100952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1128.1875, "completions/mean_terminated_length": 1128.1875, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.1874374874974995, "frac_reward_zero_std": 0.0, "grad_norm": 4.202908195244333, "kl": 0.012939453125, "learning_rate": 9.793049711086754e-07, "loss": 0.021, "num_tokens": 40654610.0, "reward": 0.0, "reward_std": 0.7663412094116211, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09802861035418453, "rewards/wordcountpos_reward/raw_geo/std": 0.0984539074503435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1064.0, "completions/mean_terminated_length": 1034.933349609375, "completions/min_length": 680.0, "completions/min_terminated_length": 680.0, "epoch": 0.1876375275055011, "frac_reward_zero_std": 0.0, "grad_norm": 3.2155208794007915, "kl": 0.007045745849609375, "learning_rate": 9.79210669539575e-07, "loss": -0.0325, "num_tokens": 40688346.0, "reward": 0.0, "reward_std": 0.7037836313247681, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015032288791508627, "rewards/wordcountpos_reward/raw_geo/std": 0.060248950796143334, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1054.0625, "completions/mean_terminated_length": 1054.0625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1878375675135027, "frac_reward_zero_std": 0.0, "grad_norm": 3.460942358114053, "kl": 0.0079803466796875, "learning_rate": 9.791161586854028e-07, "loss": -0.0167, "num_tokens": 40735075.0, "reward": 0.0, "reward_std": 0.7381820678710938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17071789608775356, "rewards/wordcountpos_reward/raw_geo/std": 0.1399092621066253, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 940.375, "completions/mean_terminated_length": 940.375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.1880376075215043, "frac_reward_zero_std": 0.0, "grad_norm": 3.9540195000080924, "kl": 0.0081939697265625, "learning_rate": 9.790214385922432e-07, "loss": 0.0089, "num_tokens": 40772721.0, "reward": 0.0, "reward_std": 0.9864704608917236, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03415326785270988, "rewards/wordcountpos_reward/raw_geo/std": 0.18909002065239966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1002.0625, "completions/mean_terminated_length": 1002.0625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.1882376475295059, "frac_reward_zero_std": 0.0, "grad_norm": 2.6784234338753503, "kl": 0.0067291259765625, "learning_rate": 9.789265093062822e-07, "loss": -0.0146, "num_tokens": 40810962.0, "reward": -2.0489096641540527e-08, "reward_std": 0.935067355632782, "rewards/wordcountpos_reward/mean": -2.0489096641540527e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09327132078886768, "rewards/wordcountpos_reward/raw_geo/std": 0.1269901313339403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1181.0, "completions/mean_terminated_length": 1181.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.1884376875375075, "frac_reward_zero_std": 0.0, "grad_norm": 2.3718240305837406, "kl": 0.00388336181640625, "learning_rate": 9.788313708738074e-07, "loss": -0.0341, "num_tokens": 40852498.0, "reward": 1.4901161193847656e-08, "reward_std": 0.97857666015625, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.092014345781485, "rewards/wordcountpos_reward/raw_geo/std": 0.046848672428784766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 977.5, "completions/mean_terminated_length": 977.5, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.18863772754550912, "frac_reward_zero_std": 0.0, "grad_norm": 3.673159131719144, "kl": 0.0086212158203125, "learning_rate": 9.787360233412088e-07, "loss": -0.0065, "num_tokens": 40889730.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9617863893508911, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01499261313592456, "rewards/wordcountpos_reward/raw_geo/std": 0.06637304140611676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1080.75, "completions/mean_terminated_length": 1080.75, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.1888377675535107, "frac_reward_zero_std": 0.0, "grad_norm": 3.7363677045967534, "kl": 0.0089263916015625, "learning_rate": 9.786404667549785e-07, "loss": -0.0154, "num_tokens": 40919758.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9655193090438843, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08338189901855494, "rewards/wordcountpos_reward/raw_geo/std": 0.08902969840183604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.21391413992361344, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1282.5625, "completions/mean_terminated_length": 1251.5, "completions/min_length": 1068.0, "completions/min_terminated_length": 1068.0, "epoch": 0.1890378075615123, "frac_reward_zero_std": 0.0, "grad_norm": 2.9672736899275782, "kl": 0.00748443603515625, "learning_rate": 9.785447011617101e-07, "loss": 0.0227, "num_tokens": 40966631.0, "reward": 0.0, "reward_std": 0.7386121153831482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.089826090091396, "rewards/wordcountpos_reward/raw_geo/std": 0.2264867558972907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 875.875, "completions/mean_terminated_length": 875.875, "completions/min_length": 533.0, "completions/min_terminated_length": 533.0, "epoch": 0.1892378475695139, "frac_reward_zero_std": 0.0, "grad_norm": 3.4993930436063767, "kl": 0.0088043212890625, "learning_rate": 9.784487266080995e-07, "loss": -0.0888, "num_tokens": 41001821.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0333006381988525, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13892540733122008, "rewards/wordcountpos_reward/raw_geo/std": 0.08107529403261154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 933.75, "completions/mean_terminated_length": 933.75, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.1894378875775155, "frac_reward_zero_std": 0.0, "grad_norm": 3.2492788300126274, "kl": 0.00623321533203125, "learning_rate": 9.783525431409443e-07, "loss": -0.0341, "num_tokens": 41051601.0, "reward": 0.0, "reward_std": 0.7974460124969482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16827035213083133, "rewards/wordcountpos_reward/raw_geo/std": 0.30028014448223206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1094.5, "completions/mean_terminated_length": 1094.5, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.1896379275855171, "frac_reward_zero_std": 0.0, "grad_norm": 3.808474183590221, "kl": 0.0098114013671875, "learning_rate": 9.78256150807144e-07, "loss": -0.0015, "num_tokens": 41103529.0, "reward": 0.0, "reward_std": 0.3778834044933319, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.020940434938996468, "rewards/wordcountpos_reward/raw_geo/std": 0.028493398935873975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999157, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1125.75, "completions/mean_terminated_length": 1072.2857666015625, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.1898379675935187, "frac_reward_zero_std": 0.0, "grad_norm": 3.4818027599143915, "kl": 0.011016845703125, "learning_rate": 9.781595496536997e-07, "loss": -0.0586, "num_tokens": 41153741.0, "reward": 5.960464477539063e-08, "reward_std": 0.6365479230880737, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.31218962735374806, "rewards/wordcountpos_reward/raw_geo/std": 0.169594822972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1158.1875, "completions/mean_terminated_length": 1135.4000244140625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.19003800760152031, "frac_reward_zero_std": 0.0, "grad_norm": 3.337117188239104, "kl": 0.00824737548828125, "learning_rate": 9.780627397277149e-07, "loss": -0.02, "num_tokens": 41194640.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8792279958724976, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0536612397072808, "rewards/wordcountpos_reward/raw_geo/std": 0.07746358153949842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 576.0, "completions/min_terminated_length": 576.0, "epoch": 0.1902380476095219, "frac_reward_zero_std": 0.0, "grad_norm": 3.1007355978220614, "kl": 0.00705718994140625, "learning_rate": 9.779657210763944e-07, "loss": -0.0226, "num_tokens": 41231227.0, "reward": 0.0, "reward_std": 1.0548025369644165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0045855926888992565, "rewards/wordcountpos_reward/raw_geo/std": 0.042990815802110095, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1053.6875, "completions/mean_terminated_length": 1023.9334106445312, "completions/min_length": 612.0, "completions/min_terminated_length": 612.0, "epoch": 0.1904380876175235, "frac_reward_zero_std": 0.0, "grad_norm": 3.146373689396383, "kl": 0.00689697265625, "learning_rate": 9.778684937470449e-07, "loss": -0.0315, "num_tokens": 41270774.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8776946663856506, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05510189335825958, "rewards/wordcountpos_reward/raw_geo/std": 0.20166074293095868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792297, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1195.125, "completions/mean_terminated_length": 1174.800048828125, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.1906381276255251, "frac_reward_zero_std": 0.0, "grad_norm": 2.025457138913446, "kl": 0.005359649658203125, "learning_rate": 9.77771057787075e-07, "loss": -0.0184, "num_tokens": 41321712.0, "reward": -2.9802322387695312e-08, "reward_std": 0.727824866771698, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2048883808902143, "rewards/wordcountpos_reward/raw_geo/std": 0.16196443135966973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1142.0, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.1908381676335267, "frac_reward_zero_std": 0.0, "grad_norm": 3.5318611129265505, "kl": 0.0095062255859375, "learning_rate": 9.776734132439948e-07, "loss": -0.0624, "num_tokens": 41365534.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3782709240913391, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15518698027748226, "rewards/wordcountpos_reward/raw_geo/std": 0.1091001333583044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1104.5, "completions/mean_terminated_length": 1104.5, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.19103820764152832, "frac_reward_zero_std": 0.0, "grad_norm": 2.717478323221834, "kl": 0.0077667236328125, "learning_rate": 9.775755601654163e-07, "loss": -0.0202, "num_tokens": 41410966.0, "reward": 0.0, "reward_std": 0.8377324938774109, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05236426151473175, "rewards/wordcountpos_reward/raw_geo/std": 0.05389861928272634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1032.8125, "completions/mean_terminated_length": 1032.8125, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.1912382476495299, "frac_reward_zero_std": 0.0, "grad_norm": 3.5367118137997884, "kl": 0.0077667236328125, "learning_rate": 9.774774985990531e-07, "loss": -0.0195, "num_tokens": 41443851.0, "reward": 3.725290298461914e-09, "reward_std": 1.0271151065826416, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11298251456849681, "rewards/wordcountpos_reward/raw_geo/std": 0.02084135336988205, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1053.8125, "completions/mean_terminated_length": 1053.8125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.19143828765753151, "frac_reward_zero_std": 0.0, "grad_norm": 3.1117085526957804, "kl": 0.0086822509765625, "learning_rate": 9.773792285927204e-07, "loss": 0.0119, "num_tokens": 41484328.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0307097434997559, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043141481545671684, "rewards/wordcountpos_reward/raw_geo/std": 0.24255058215578731, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1167.4375, "completions/mean_terminated_length": 1167.4375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.1916383276655331, "frac_reward_zero_std": 0.0, "grad_norm": 3.2129636731068816, "kl": 0.008514404296875, "learning_rate": 9.772807501943352e-07, "loss": 0.0204, "num_tokens": 41521719.0, "reward": 0.0, "reward_std": 1.0393407344818115, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12965038779104493, "rewards/wordcountpos_reward/raw_geo/std": 0.08441975781806724, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1038.1875, "completions/mean_terminated_length": 1038.1875, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.1918383676735347, "frac_reward_zero_std": 0.0, "grad_norm": 3.5629243635685652, "kl": 0.00872039794921875, "learning_rate": 9.77182063451916e-07, "loss": -0.0497, "num_tokens": 41566938.0, "reward": -1.4901161193847656e-08, "reward_std": 1.004325032234192, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07352977140174699, "rewards/wordcountpos_reward/raw_geo/std": 0.111066035024086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1238.9375, "completions/mean_terminated_length": 1178.6923828125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1920384076815363, "frac_reward_zero_std": 0.0, "grad_norm": 3.0164824637685608, "kl": 0.00860595703125, "learning_rate": 9.770831684135825e-07, "loss": -0.0582, "num_tokens": 41621081.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6376367807388306, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03276566488367626, "rewards/wordcountpos_reward/raw_geo/std": 0.0983017904178729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 1069.9375, "completions/mean_terminated_length": 1069.9375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.1922384476895379, "frac_reward_zero_std": 0.0, "grad_norm": 3.521652737115484, "kl": 0.00927734375, "learning_rate": 9.76984065127557e-07, "loss": -0.0088, "num_tokens": 41667560.0, "reward": 0.0, "reward_std": 0.8463444113731384, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06925460984066141, "rewards/wordcountpos_reward/raw_geo/std": 0.4339992986635063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1083.875, "completions/mean_terminated_length": 1056.1334228515625, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.19243848769753952, "frac_reward_zero_std": 0.0, "grad_norm": 2.8058250931398803, "kl": 0.005828857421875, "learning_rate": 9.768847536421628e-07, "loss": -0.0343, "num_tokens": 41708630.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6933983564376831, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15456258207414825, "rewards/wordcountpos_reward/raw_geo/std": 0.12007180741351452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1119.0625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1926385277055411, "frac_reward_zero_std": 0.0, "grad_norm": 3.3230610852581797, "kl": 0.01111602783203125, "learning_rate": 9.76785234005824e-07, "loss": -0.0159, "num_tokens": 41757175.0, "reward": 5.960464477539063e-08, "reward_std": 0.618442952632904, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12197930537576111, "rewards/wordcountpos_reward/raw_geo/std": 0.1729320999573292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1395.1875, "completions/mean_terminated_length": 1260.4285888671875, "completions/min_length": 1083.0, "completions/min_terminated_length": 1083.0, "epoch": 0.1928385677135427, "frac_reward_zero_std": 0.0, "grad_norm": 1.9034664701635828, "kl": 0.004024505615234375, "learning_rate": 9.76685506267067e-07, "loss": 0.0, "num_tokens": 41806586.0, "reward": -3.725290298461914e-08, "reward_std": 1.0329450368881226, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1655605184014538, "rewards/wordcountpos_reward/raw_geo/std": 0.0901731679471355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1163.75, "completions/mean_terminated_length": 1163.75, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.1930386077215443, "frac_reward_zero_std": 0.0, "grad_norm": 3.628883578815303, "kl": 0.0093231201171875, "learning_rate": 9.765855704745196e-07, "loss": -0.0111, "num_tokens": 41844014.0, "reward": 0.0, "reward_std": 0.6979318857192993, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0696106848600084, "rewards/wordcountpos_reward/raw_geo/std": 0.2402973831267178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1050.4375, "completions/mean_terminated_length": 986.21435546875, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.1932386477295459, "frac_reward_zero_std": 0.0, "grad_norm": 2.9072885721970954, "kl": 0.0061798095703125, "learning_rate": 9.764854266769112e-07, "loss": 0.0236, "num_tokens": 41876885.0, "reward": 0.0, "reward_std": 0.6620415449142456, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011167292201370055, "rewards/wordcountpos_reward/raw_geo/std": 0.06220947820361317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 1078.0, "completions/mean_terminated_length": 1049.86669921875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.19343868773754752, "frac_reward_zero_std": 0.0, "grad_norm": 3.7060119984863347, "kl": 0.01177978515625, "learning_rate": 9.763850749230719e-07, "loss": 0.0546, "num_tokens": 41916493.0, "reward": 0.0, "reward_std": 0.8707925081253052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3934134521310546, "rewards/wordcountpos_reward/raw_geo/std": 0.21077258660552445, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1142.5625, "completions/mean_terminated_length": 1142.5625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.1936387277455491, "frac_reward_zero_std": 0.0, "grad_norm": 3.7829595182124085, "kl": 0.010772705078125, "learning_rate": 9.76284515261934e-07, "loss": 0.0307, "num_tokens": 41954942.0, "reward": 0.0, "reward_std": 0.4330425262451172, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07786518552428937, "rewards/wordcountpos_reward/raw_geo/std": 0.08364627810801072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1340.4375, "completions/mean_terminated_length": 1267.9091796875, "completions/min_length": 1051.0, "completions/min_terminated_length": 1051.0, "epoch": 0.19383876775355072, "frac_reward_zero_std": 0.0, "grad_norm": 3.3035216691790685, "kl": 0.009368896484375, "learning_rate": 9.761837477425306e-07, "loss": 0.0093, "num_tokens": 42001525.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6716781854629517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10570408102791175, "rewards/wordcountpos_reward/raw_geo/std": 0.10795750849376855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1061.0625, "completions/mean_terminated_length": 1061.0625, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.1940388077615523, "frac_reward_zero_std": 0.0, "grad_norm": 3.346635918510029, "kl": 0.00823974609375, "learning_rate": 9.760827724139967e-07, "loss": 0.0121, "num_tokens": 42049838.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0304864645004272, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03648364764650533, "rewards/wordcountpos_reward/raw_geo/std": 0.23760163433039144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1154.0, "completions/mean_terminated_length": 1154.0, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.1942388477695539, "frac_reward_zero_std": 0.0, "grad_norm": 3.2439435691879868, "kl": 0.0102081298828125, "learning_rate": 9.75981589325568e-07, "loss": -0.0396, "num_tokens": 42099486.0, "reward": 0.0, "reward_std": 0.876253068447113, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17271538754340202, "rewards/wordcountpos_reward/raw_geo/std": 0.17509874594368177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1234.25, "completions/mean_terminated_length": 1172.923095703125, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.19443888777755552, "frac_reward_zero_std": 0.0, "grad_norm": 3.4757502753356073, "kl": 0.009765625, "learning_rate": 9.758801985265822e-07, "loss": -0.0013, "num_tokens": 42149370.0, "reward": 0.0, "reward_std": 0.842781662940979, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11323660183352387, "rewards/wordcountpos_reward/raw_geo/std": 0.15145568012805483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1176.625, "completions/mean_terminated_length": 1130.4285888671875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.1946389277855571, "frac_reward_zero_std": 0.0, "grad_norm": 3.1036467203599782, "kl": 0.00916290283203125, "learning_rate": 9.757786000664776e-07, "loss": -0.0158, "num_tokens": 42205308.0, "reward": 0.0, "reward_std": 0.8360965251922607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10275137900902076, "rewards/wordcountpos_reward/raw_geo/std": 0.06586226345953472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1141.0, "completions/mean_terminated_length": 1117.0667724609375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.19483896779355872, "frac_reward_zero_std": 0.0, "grad_norm": 3.021099739457683, "kl": 0.008209228515625, "learning_rate": 9.756767939947943e-07, "loss": -0.0311, "num_tokens": 42253412.0, "reward": 0.0, "reward_std": 1.019844889640808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007511562888598632, "rewards/wordcountpos_reward/raw_geo/std": 0.06708045674482292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1179.0, "completions/mean_terminated_length": 1179.0, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.1950390078015603, "frac_reward_zero_std": 0.0, "grad_norm": 2.854015569032638, "kl": 0.00693511962890625, "learning_rate": 9.755747803611732e-07, "loss": -0.0543, "num_tokens": 42293540.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5685156583786011, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06669527383566692, "rewards/wordcountpos_reward/raw_geo/std": 0.058346499102673276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 925.875, "completions/mean_terminated_length": 925.875, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.19523904780956192, "frac_reward_zero_std": 0.0, "grad_norm": 4.153345392237854, "kl": 0.0108795166015625, "learning_rate": 9.754725592153568e-07, "loss": -0.0064, "num_tokens": 42330922.0, "reward": 0.0, "reward_std": 0.9093058109283447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04304334797540349, "rewards/wordcountpos_reward/raw_geo/std": 0.15730764071942582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1105.5, "completions/mean_terminated_length": 1105.5, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.1954390878175635, "frac_reward_zero_std": 0.0, "grad_norm": 3.661816799130481, "kl": 0.009307861328125, "learning_rate": 9.753701306071882e-07, "loss": 0.0021, "num_tokens": 42366290.0, "reward": 7.450580596923828e-09, "reward_std": 1.0274888277053833, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0812317415337402, "rewards/wordcountpos_reward/raw_geo/std": 0.05955075190937857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.1956391278255651, "frac_reward_zero_std": 0.0, "grad_norm": 2.8004850709686835, "kl": 0.0074462890625, "learning_rate": 9.752674945866127e-07, "loss": 0.005, "num_tokens": 42401408.0, "reward": -7.450580596923828e-09, "reward_std": 1.0198220014572144, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.023738225912251193, "rewards/wordcountpos_reward/raw_geo/std": 0.04440457900992265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1089.1875, "completions/mean_terminated_length": 1061.800048828125, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.19583916783356672, "frac_reward_zero_std": 0.0, "grad_norm": 3.568812250465844, "kl": 0.00994873046875, "learning_rate": 9.751646512036756e-07, "loss": 0.0032, "num_tokens": 42451611.0, "reward": 0.0, "reward_std": 0.7490805387496948, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09593330105966062, "rewards/wordcountpos_reward/raw_geo/std": 0.2202655803146919, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1055.0, "completions/max_terminated_length": 1055.0, "completions/mean_length": 851.3125, "completions/mean_terminated_length": 851.3125, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.1960392078415683, "frac_reward_zero_std": 0.0, "grad_norm": 2.3782033366002433, "kl": 0.004711151123046875, "learning_rate": 9.750616005085239e-07, "loss": -0.0175, "num_tokens": 42480184.0, "reward": 5.960464477539063e-08, "reward_std": 0.8601989150047302, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0245311797473303, "rewards/wordcountpos_reward/raw_geo/std": 0.06770323783539561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 1019.75, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.19623924784956992, "frac_reward_zero_std": 0.0, "grad_norm": 3.4574797523548084, "kl": 0.0111083984375, "learning_rate": 9.749583425514056e-07, "loss": -0.0184, "num_tokens": 42514812.0, "reward": 5.960464477539063e-08, "reward_std": 0.46284985542297363, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3294717789373263, "rewards/wordcountpos_reward/raw_geo/std": 0.21859223012515988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1212.6875, "completions/mean_terminated_length": 1212.6875, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.1964392878575715, "frac_reward_zero_std": 0.0, "grad_norm": 2.9099196360236563, "kl": 0.007171630859375, "learning_rate": 9.748548773826699e-07, "loss": -0.0148, "num_tokens": 42563895.0, "reward": 0.0, "reward_std": 1.0514941215515137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01889332752140014, "rewards/wordcountpos_reward/raw_geo/std": 0.07481984926625332, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1058.875, "completions/mean_terminated_length": 1058.875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.19663932786557312, "frac_reward_zero_std": 0.0, "grad_norm": 3.151585454078633, "kl": 0.00701904296875, "learning_rate": 9.747512050527667e-07, "loss": 0.0253, "num_tokens": 42596989.0, "reward": 0.0, "reward_std": 1.046484112739563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015493456575434527, "rewards/wordcountpos_reward/raw_geo/std": 0.15277289265175256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1209.9375, "completions/mean_terminated_length": 1113.25, "completions/min_length": 672.0, "completions/min_terminated_length": 672.0, "epoch": 0.19683936787357473, "frac_reward_zero_std": 0.0, "grad_norm": 2.9048178232358635, "kl": 0.00804901123046875, "learning_rate": 9.746473256122473e-07, "loss": -0.034, "num_tokens": 42637820.0, "reward": 0.0, "reward_std": 0.36593806743621826, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05224381525715387, "rewards/wordcountpos_reward/raw_geo/std": 0.06349015503984301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1152.0, "completions/mean_terminated_length": 1071.6923828125, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.1970394078815763, "frac_reward_zero_std": 0.0, "grad_norm": 3.0629351016771613, "kl": 0.0122833251953125, "learning_rate": 9.745432391117634e-07, "loss": -0.0031, "num_tokens": 42691764.0, "reward": 0.0, "reward_std": 0.6746702194213867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.25304571711899665, "rewards/wordcountpos_reward/raw_geo/std": 0.09335789006529434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1049.0, "completions/mean_terminated_length": 1049.0, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.19723944788957792, "frac_reward_zero_std": 0.0, "grad_norm": 3.5900638441724966, "kl": 0.0093231201171875, "learning_rate": 9.744389456020683e-07, "loss": -0.0179, "num_tokens": 42741092.0, "reward": 0.0, "reward_std": 0.5195954442024231, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03323786939776842, "rewards/wordcountpos_reward/raw_geo/std": 0.10291103956641054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1222.0625, "completions/mean_terminated_length": 1203.533447265625, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.1974394878975795, "frac_reward_zero_std": 0.0, "grad_norm": 3.2543367145183417, "kl": 0.0085906982421875, "learning_rate": 9.743344451340161e-07, "loss": -0.0615, "num_tokens": 42790005.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9430963397026062, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10766070143457532, "rewards/wordcountpos_reward/raw_geo/std": 0.12288434424109687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1415.5625, "completions/mean_terminated_length": 1377.181884765625, "completions/min_length": 1287.0, "completions/min_terminated_length": 1287.0, "epoch": 0.19763952790558112, "frac_reward_zero_std": 0.0, "grad_norm": 2.886796861628751, "kl": 0.008636474609375, "learning_rate": 9.742297377585617e-07, "loss": -0.0101, "num_tokens": 42847150.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0454461574554443, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016622545838982518, "rewards/wordcountpos_reward/raw_geo/std": 0.13484991419802797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1299.875, "completions/mean_terminated_length": 1179.800048828125, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.1978395679135827, "frac_reward_zero_std": 0.0, "grad_norm": 3.2002692872767162, "kl": 0.0091400146484375, "learning_rate": 9.741248235267608e-07, "loss": 0.0083, "num_tokens": 42893956.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9585890769958496, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007615239840049383, "rewards/wordcountpos_reward/raw_geo/std": 0.07497166709515451, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1137.375, "completions/mean_terminated_length": 1113.2000732421875, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.19803960792158432, "frac_reward_zero_std": 0.0, "grad_norm": 3.6598295686262654, "kl": 0.0095977783203125, "learning_rate": 9.740197024897697e-07, "loss": -0.0598, "num_tokens": 42945754.0, "reward": 0.0, "reward_std": 0.5584701299667358, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00893313560242432, "rewards/wordcountpos_reward/raw_geo/std": 0.2142660894191206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1046.875, "completions/mean_terminated_length": 1046.875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.19823964792958593, "frac_reward_zero_std": 0.0, "grad_norm": 3.5833108644296234, "kl": 0.00970458984375, "learning_rate": 9.739143746988466e-07, "loss": -0.0391, "num_tokens": 42985264.0, "reward": -7.450580596923828e-09, "reward_std": 1.0384418964385986, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08075193953930891, "rewards/wordcountpos_reward/raw_geo/std": 0.0889943981378654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 973.25, "completions/mean_terminated_length": 973.25, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.1984396879375875, "frac_reward_zero_std": 0.0, "grad_norm": 3.838958553485212, "kl": 0.009521484375, "learning_rate": 9.738088402053494e-07, "loss": 0.0133, "num_tokens": 43027740.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6900591850280762, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.020625120433161916, "rewards/wordcountpos_reward/raw_geo/std": 0.040872144250805806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 966.0, "completions/mean_terminated_length": 966.0, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.19863972794558912, "frac_reward_zero_std": 0.0, "grad_norm": 2.942477990039526, "kl": 0.00498199462890625, "learning_rate": 9.73703099060737e-07, "loss": -0.0335, "num_tokens": 43056948.0, "reward": 2.9802322387695312e-08, "reward_std": 0.671892523765564, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1324626588364287, "rewards/wordcountpos_reward/raw_geo/std": 0.10159128676666625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1055.25, "completions/mean_terminated_length": 1025.60009765625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.1988397679535907, "frac_reward_zero_std": 0.0, "grad_norm": 3.1296251252511853, "kl": 0.009765625, "learning_rate": 9.735971513165697e-07, "loss": -0.0137, "num_tokens": 43107104.0, "reward": 7.450580596923828e-09, "reward_std": 1.0283710956573486, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1545300709551925, "rewards/wordcountpos_reward/raw_geo/std": 0.08963247578208394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1269.1875, "completions/mean_terminated_length": 1269.1875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.19903980796159232, "frac_reward_zero_std": 0.0, "grad_norm": 2.6896227126846854, "kl": 0.00626373291015625, "learning_rate": 9.734909970245076e-07, "loss": -0.0177, "num_tokens": 43160643.0, "reward": 0.0, "reward_std": 0.8295433521270752, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05391219745952789, "rewards/wordcountpos_reward/raw_geo/std": 0.03497497878269501, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1120.375, "completions/mean_terminated_length": 1120.375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.19923984796959393, "frac_reward_zero_std": 0.0, "grad_norm": 2.2234385607897336, "kl": 0.00362396240234375, "learning_rate": 9.733846362363127e-07, "loss": -0.0124, "num_tokens": 43200889.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9960434436798096, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18255350614425478, "rewards/wordcountpos_reward/raw_geo/std": 0.10320528482418585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1044.625, "completions/mean_terminated_length": 1044.625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.19943988797759551, "frac_reward_zero_std": 0.0, "grad_norm": 2.3706599514308855, "kl": 0.00672149658203125, "learning_rate": 9.732780690038464e-07, "loss": -0.029, "num_tokens": 43246539.0, "reward": -2.9802322387695312e-08, "reward_std": 0.806902289390564, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14570355804490545, "rewards/wordcountpos_reward/raw_geo/std": 0.2641254723789616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1085254706406647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1139.375, "completions/mean_terminated_length": 1115.3333740234375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.19963992798559713, "frac_reward_zero_std": 0.0, "grad_norm": 3.49452593312421, "kl": 0.0141754150390625, "learning_rate": 9.731712953790718e-07, "loss": 0.0327, "num_tokens": 43297497.0, "reward": -2.9802322387695312e-08, "reward_std": 0.871312141418457, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23033980866026177, "rewards/wordcountpos_reward/raw_geo/std": 0.11314929356550549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 979.8125, "completions/mean_terminated_length": 979.8125, "completions/min_length": 670.0, "completions/min_terminated_length": 670.0, "epoch": 0.1998399679935987, "frac_reward_zero_std": 0.0, "grad_norm": 4.057703688846008, "kl": 0.0113983154296875, "learning_rate": 9.730643154140518e-07, "loss": -0.0041, "num_tokens": 43337534.0, "reward": 7.450580596923828e-09, "reward_std": 0.9890952706336975, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.000717136643856052, "rewards/wordcountpos_reward/raw_geo/std": 0.1644765721873642, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1256.3125, "completions/mean_terminated_length": 1240.0667724609375, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.20004000800160032, "frac_reward_zero_std": 0.0, "grad_norm": 3.0434314326695864, "kl": 0.00659942626953125, "learning_rate": 9.729571291609507e-07, "loss": 0.0067, "num_tokens": 43385067.0, "reward": 0.0, "reward_std": 0.6750105619430542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24994730968579076, "rewards/wordcountpos_reward/raw_geo/std": 0.1243578461053855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1161.0, "completions/mean_terminated_length": 1138.4000244140625, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.20024004800960193, "frac_reward_zero_std": 0.0, "grad_norm": 1.399634499785646, "kl": 0.005084991455078125, "learning_rate": 9.728497366720326e-07, "loss": -0.0336, "num_tokens": 43422891.0, "reward": 0.0, "reward_std": 0.9323999285697937, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0651619885517758, "rewards/wordcountpos_reward/raw_geo/std": 0.07319846293518127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 978.4375, "completions/mean_terminated_length": 943.6666870117188, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.20044008801760352, "frac_reward_zero_std": 0.0, "grad_norm": 3.8183821745056505, "kl": 0.0240631103515625, "learning_rate": 9.727421379996629e-07, "loss": 0.0219, "num_tokens": 43466570.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8229279518127441, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.003386494474354254, "rewards/wordcountpos_reward/raw_geo/std": 0.1337370553313024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1254.5625, "completions/mean_terminated_length": 1197.923095703125, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.20064012802560513, "frac_reward_zero_std": 0.0, "grad_norm": 3.0389045351919304, "kl": 0.009735107421875, "learning_rate": 9.72634333196307e-07, "loss": -0.0325, "num_tokens": 43520907.0, "reward": 0.0, "reward_std": 1.035268783569336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11362823035027286, "rewards/wordcountpos_reward/raw_geo/std": 0.07524407763852438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1040.625, "completions/mean_terminated_length": 1040.625, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.20084016803360671, "frac_reward_zero_std": 0.0, "grad_norm": 3.8449276166291813, "kl": 0.01239013671875, "learning_rate": 9.72526322314531e-07, "loss": -0.0429, "num_tokens": 43569077.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0190765857696533, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012059881097390247, "rewards/wordcountpos_reward/raw_geo/std": 0.1589921796287149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.20104020804160833, "frac_reward_zero_std": 0.0, "grad_norm": 3.2187429289676253, "kl": 0.007488250732421875, "learning_rate": 9.724181054070018e-07, "loss": -0.0093, "num_tokens": 43606907.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6909418702125549, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2556306419603813, "rewards/wordcountpos_reward/raw_geo/std": 0.15910946882496937, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1266.4375, "completions/mean_terminated_length": 1160.272705078125, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.2012402480496099, "frac_reward_zero_std": 0.0, "grad_norm": 2.9364824537407386, "kl": 0.008056640625, "learning_rate": 9.723096825264862e-07, "loss": -0.0111, "num_tokens": 43653738.0, "reward": 0.0, "reward_std": 0.8947299718856812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06235842115735778, "rewards/wordcountpos_reward/raw_geo/std": 0.15017209687269067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 919.25, "completions/mean_terminated_length": 919.25, "completions/min_length": 575.0, "completions/min_terminated_length": 575.0, "epoch": 0.20144028805761152, "frac_reward_zero_std": 0.0, "grad_norm": 3.9551609060186763, "kl": 0.00917816162109375, "learning_rate": 9.722010537258516e-07, "loss": 0.013, "num_tokens": 43688534.0, "reward": -5.960464477539063e-08, "reward_std": 0.506924569606781, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15070179428562483, "rewards/wordcountpos_reward/raw_geo/std": 0.13979391716697442, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1030.8125, "completions/mean_terminated_length": 1030.8125, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.20164032806561313, "frac_reward_zero_std": 0.0, "grad_norm": 3.2433443084594336, "kl": 0.010711669921875, "learning_rate": 9.720922190580662e-07, "loss": -0.0007, "num_tokens": 43734483.0, "reward": 0.0, "reward_std": 0.9012659192085266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06391463854055113, "rewards/wordcountpos_reward/raw_geo/std": 0.1302640900397669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1269.0, "completions/mean_terminated_length": 1253.60009765625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.20184036807361472, "frac_reward_zero_std": 0.0, "grad_norm": 2.7286355380619396, "kl": 0.0070953369140625, "learning_rate": 9.719831785761981e-07, "loss": -0.0353, "num_tokens": 43770819.0, "reward": 0.0, "reward_std": 0.9553343057632446, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0908261959148386, "rewards/wordcountpos_reward/raw_geo/std": 0.042759784935710685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1405.375, "completions/mean_terminated_length": 1310.75, "completions/min_length": 1152.0, "completions/min_terminated_length": 1152.0, "epoch": 0.20204040808161633, "frac_reward_zero_std": 0.0, "grad_norm": 2.8465258755884864, "kl": 0.0076446533203125, "learning_rate": 9.71873932333416e-07, "loss": -0.0083, "num_tokens": 43815001.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8110127449035645, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1908153374345717, "rewards/wordcountpos_reward/raw_geo/std": 0.199833009011391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 937.6875, "completions/mean_terminated_length": 937.6875, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.2022404480896179, "frac_reward_zero_std": 0.0, "grad_norm": 4.074079248938705, "kl": 0.0098419189453125, "learning_rate": 9.717644803829886e-07, "loss": 0.0146, "num_tokens": 43844468.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6750224232673645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04593400618804369, "rewards/wordcountpos_reward/raw_geo/std": 0.14252343778669646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 1048.75, "completions/mean_terminated_length": 1018.666748046875, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.20244048809761953, "frac_reward_zero_std": 0.0, "grad_norm": 3.1035989183070685, "kl": 0.00640869140625, "learning_rate": 9.716548227782854e-07, "loss": 0.0186, "num_tokens": 43885864.0, "reward": -2.2351741790771484e-08, "reward_std": 1.048802137374878, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021547923324218496, "rewards/wordcountpos_reward/raw_geo/std": 0.0517572886398629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 1122.0625, "completions/mean_terminated_length": 1068.071533203125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.20264052810562114, "frac_reward_zero_std": 0.0, "grad_norm": 3.2404770470182935, "kl": 0.0105743408203125, "learning_rate": 9.71544959572776e-07, "loss": -0.0386, "num_tokens": 43928345.0, "reward": 0.0, "reward_std": 1.0619075298309326, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0492492401782355, "rewards/wordcountpos_reward/raw_geo/std": 0.05833713263304141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1128.375, "completions/mean_terminated_length": 1128.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.20284056811362272, "frac_reward_zero_std": 0.0, "grad_norm": 3.3657429378593324, "kl": 0.0091094970703125, "learning_rate": 9.7143489082003e-07, "loss": -0.0155, "num_tokens": 43975567.0, "reward": 0.0, "reward_std": 0.319256991147995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015362074205052636, "rewards/wordcountpos_reward/raw_geo/std": 0.12340124484360218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.65, "rewards/wordcountpos_reward/raw_rule/std": 0.30453364467779376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1104.625, "completions/mean_terminated_length": 1104.625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.20304060812162433, "frac_reward_zero_std": 0.0, "grad_norm": 3.185615311746243, "kl": 0.0096282958984375, "learning_rate": 9.713246165737177e-07, "loss": -0.0759, "num_tokens": 44017673.0, "reward": -2.9802322387695312e-08, "reward_std": 0.44372987747192383, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0797116445323041, "rewards/wordcountpos_reward/raw_geo/std": 0.07849822580631244, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 986.625, "completions/mean_terminated_length": 986.625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.20324064812962592, "frac_reward_zero_std": 0.0, "grad_norm": 3.495395315578162, "kl": 0.00913238525390625, "learning_rate": 9.712141368876092e-07, "loss": -0.0221, "num_tokens": 44059059.0, "reward": 0.0, "reward_std": 0.6407808661460876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0890741892342035, "rewards/wordcountpos_reward/raw_geo/std": 0.08983352967736655, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1055.5, "completions/mean_terminated_length": 1055.5, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.20344068813762753, "frac_reward_zero_std": 0.0, "grad_norm": 3.170061551755287, "kl": 0.0094146728515625, "learning_rate": 9.711034518155746e-07, "loss": -0.0622, "num_tokens": 44106723.0, "reward": 0.0, "reward_std": 0.6020821332931519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00011505968698227731, "rewards/wordcountpos_reward/raw_geo/std": 0.16896957555983888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17506612507320812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1252.75, "completions/mean_terminated_length": 1252.75, "completions/min_length": 1120.0, "completions/min_terminated_length": 1120.0, "epoch": 0.2036407281456291, "frac_reward_zero_std": 0.0, "grad_norm": 2.4621360294908987, "kl": 0.006805419921875, "learning_rate": 9.709925614115849e-07, "loss": 0.0073, "num_tokens": 44148655.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9484935998916626, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08429771219319272, "rewards/wordcountpos_reward/raw_geo/std": 0.052876669531145705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 996.75, "completions/mean_terminated_length": 996.75, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.20384076815363072, "frac_reward_zero_std": 0.0, "grad_norm": 4.090329188622991, "kl": 0.012451171875, "learning_rate": 9.708814657297105e-07, "loss": 0.0112, "num_tokens": 44194107.0, "reward": 0.0, "reward_std": 0.880154013633728, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21787251053087683, "rewards/wordcountpos_reward/raw_geo/std": 0.13905024675194633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941139, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1055.125, "completions/mean_terminated_length": 1055.125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.20404080816163234, "frac_reward_zero_std": 0.0, "grad_norm": 2.317716015973491, "kl": 0.00559234619140625, "learning_rate": 9.707701648241223e-07, "loss": -0.032, "num_tokens": 44231437.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9892827868461609, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03434693083274678, "rewards/wordcountpos_reward/raw_geo/std": 0.05628299206201947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1089.125, "completions/mean_terminated_length": 1089.125, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.20424084816963392, "frac_reward_zero_std": 0.0, "grad_norm": 3.430035718638262, "kl": 0.009124755859375, "learning_rate": 9.706586587490908e-07, "loss": 0.0525, "num_tokens": 44272919.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0335841178894043, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028271410102089327, "rewards/wordcountpos_reward/raw_geo/std": 0.097688141579826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1074.875, "completions/mean_terminated_length": 1074.875, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.20444088817763553, "frac_reward_zero_std": 0.0, "grad_norm": 3.2780810189160525, "kl": 0.0096282958984375, "learning_rate": 9.705469475589875e-07, "loss": -0.0398, "num_tokens": 44305125.0, "reward": 0.0, "reward_std": 0.9569463729858398, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02301718987540889, "rewards/wordcountpos_reward/raw_geo/std": 0.03126932343765298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1035.6875, "completions/mean_terminated_length": 1035.6875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.20464092818563712, "frac_reward_zero_std": 0.0, "grad_norm": 3.3608929556981004, "kl": 0.010162353515625, "learning_rate": 9.704350313082827e-07, "loss": -0.0108, "num_tokens": 44338328.0, "reward": 0.0, "reward_std": 0.9830136895179749, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09146580262743688, "rewards/wordcountpos_reward/raw_geo/std": 0.12937457719782372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1071.3125, "completions/mean_terminated_length": 1042.7333984375, "completions/min_length": 626.0, "completions/min_terminated_length": 626.0, "epoch": 0.20484096819363873, "frac_reward_zero_std": 0.0, "grad_norm": 2.9423630011472475, "kl": 0.00988006591796875, "learning_rate": 9.703229100515476e-07, "loss": 0.0003, "num_tokens": 44385205.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9214580059051514, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11169865509064815, "rewards/wordcountpos_reward/raw_geo/std": 0.05598194963629006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1042.125, "completions/mean_terminated_length": 1011.6000366210938, "completions/min_length": 728.0, "completions/min_terminated_length": 728.0, "epoch": 0.20504100820164034, "frac_reward_zero_std": 0.0, "grad_norm": 3.2124276466546506, "kl": 0.0076141357421875, "learning_rate": 9.702105838434528e-07, "loss": -0.0065, "num_tokens": 44415439.0, "reward": 0.0, "reward_std": 0.6295967102050781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0162787288376992, "rewards/wordcountpos_reward/raw_geo/std": 0.06378971790509128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1110.0, "completions/mean_terminated_length": 1084.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.20524104820964192, "frac_reward_zero_std": 0.0, "grad_norm": 3.520301643165769, "kl": 0.0111083984375, "learning_rate": 9.700980527387692e-07, "loss": 0.0043, "num_tokens": 44467287.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9457652568817139, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10087234255350139, "rewards/wordcountpos_reward/raw_geo/std": 0.07879581422583315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1159.125, "completions/mean_terminated_length": 1110.4285888671875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.20544108821764354, "frac_reward_zero_std": 0.0, "grad_norm": 3.3295307185596497, "kl": 0.0095367431640625, "learning_rate": 9.699853167923675e-07, "loss": -0.0288, "num_tokens": 44502601.0, "reward": 0.0, "reward_std": 0.6006546020507812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.003064591765003523, "rewards/wordcountpos_reward/raw_geo/std": 0.12383102429605307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1419.375, "completions/mean_terminated_length": 1356.6666259765625, "completions/min_length": 1125.0, "completions/min_terminated_length": 1125.0, "epoch": 0.20564112822564512, "frac_reward_zero_std": 0.0, "grad_norm": 2.963689846310938, "kl": 0.00814056396484375, "learning_rate": 9.698723760592182e-07, "loss": -0.0088, "num_tokens": 44559871.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0168511867523193, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15300859766625066, "rewards/wordcountpos_reward/raw_geo/std": 0.26536751178136914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1200.5, "completions/mean_terminated_length": 1180.533447265625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.20584116823364673, "frac_reward_zero_std": 0.0, "grad_norm": 2.8322887391144014, "kl": 0.00760650634765625, "learning_rate": 9.697592305943917e-07, "loss": 0.0202, "num_tokens": 44601959.0, "reward": 0.0, "reward_std": 0.9511810541152954, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0730598886110834, "rewards/wordcountpos_reward/raw_geo/std": 0.0708538610963962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1343.1875, "completions/mean_terminated_length": 1271.9091796875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.20604120824164832, "frac_reward_zero_std": 0.0, "grad_norm": 2.4594413853019943, "kl": 0.00547027587890625, "learning_rate": 9.696458804530582e-07, "loss": -0.0158, "num_tokens": 44649010.0, "reward": -5.960464477539063e-08, "reward_std": 0.7383902072906494, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22492243973412773, "rewards/wordcountpos_reward/raw_geo/std": 0.17503506611537023, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 982.0, "completions/mean_terminated_length": 982.0, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.20624124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.4996242284965136, "kl": 0.011444091796875, "learning_rate": 9.69532325690488e-07, "loss": -0.058, "num_tokens": 44688690.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0064935684204102, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024527595457799145, "rewards/wordcountpos_reward/raw_geo/std": 0.12104033671782122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1211.0625, "completions/mean_terminated_length": 1191.800048828125, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.20644128825765154, "frac_reward_zero_std": 0.0, "grad_norm": 3.2057668136861093, "kl": 0.0098876953125, "learning_rate": 9.694185663620505e-07, "loss": -0.0261, "num_tokens": 44732035.0, "reward": 0.0, "reward_std": 0.9098652601242065, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.007008216224957144, "rewards/wordcountpos_reward/raw_geo/std": 0.047497767298104045, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1142.6923828125, "completions/min_length": 446.0, "completions/min_terminated_length": 446.0, "epoch": 0.20664132826565312, "frac_reward_zero_std": 0.0, "grad_norm": 3.0143958106665023, "kl": 0.0100250244140625, "learning_rate": 9.693046025232158e-07, "loss": -0.0284, "num_tokens": 44775254.0, "reward": 0.0, "reward_std": 0.7934092283248901, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005931047853998304, "rewards/wordcountpos_reward/raw_geo/std": 0.03753031833194541, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1157.4375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.20684136827365474, "frac_reward_zero_std": 0.0, "grad_norm": 3.58827631959605, "kl": 0.0111236572265625, "learning_rate": 9.691904342295527e-07, "loss": -0.0267, "num_tokens": 44811389.0, "reward": -3.725290298461914e-08, "reward_std": 1.0079220533370972, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10497499442564695, "rewards/wordcountpos_reward/raw_geo/std": 0.06730277239524456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1147.625, "completions/mean_terminated_length": 1147.625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.20704140828165632, "frac_reward_zero_std": 0.0, "grad_norm": 2.612447949866124, "kl": 0.0074920654296875, "learning_rate": 9.690760615367303e-07, "loss": -0.0002, "num_tokens": 44854199.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0288931131362915, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19293829786213518, "rewards/wordcountpos_reward/raw_geo/std": 0.08432007848148097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1106.5, "completions/mean_terminated_length": 1106.5, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.20724144828965793, "frac_reward_zero_std": 0.0, "grad_norm": 2.1016342536230606, "kl": 0.00634002685546875, "learning_rate": 9.689614845005175e-07, "loss": -0.0015, "num_tokens": 44890079.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0467946529388428, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2070276611857532, "rewards/wordcountpos_reward/raw_geo/std": 0.11631432940669552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/max_terminated_length": 917.0, "completions/mean_length": 781.6875, "completions/mean_terminated_length": 781.6875, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.20744148829765954, "frac_reward_zero_std": 0.0, "grad_norm": 3.2564041530876815, "kl": 0.0065155029296875, "learning_rate": 9.688467031767824e-07, "loss": 0.0159, "num_tokens": 44915442.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9421567916870117, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059829417446559764, "rewards/wordcountpos_reward/raw_geo/std": 0.06451722720566706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1175.9375, "completions/mean_terminated_length": 1129.6429443359375, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.20764152830566113, "frac_reward_zero_std": 0.0, "grad_norm": 3.356593784508273, "kl": 0.0098876953125, "learning_rate": 9.687317176214927e-07, "loss": 0.0385, "num_tokens": 44967169.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9754360318183899, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13508211703300332, "rewards/wordcountpos_reward/raw_geo/std": 0.14859961766053717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1070.6875, "completions/mean_terminated_length": 1070.6875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.20784156831366274, "frac_reward_zero_std": 0.0, "grad_norm": 2.885257352748282, "kl": 0.00785064697265625, "learning_rate": 9.686165278907162e-07, "loss": -0.0121, "num_tokens": 44999892.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8495421409606934, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012762074037665194, "rewards/wordcountpos_reward/raw_geo/std": 0.0540356347368421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1057.25, "completions/mean_terminated_length": 1057.25, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.20804160832166432, "frac_reward_zero_std": 0.0, "grad_norm": 3.4147544811439112, "kl": 0.01132965087890625, "learning_rate": 9.6850113404062e-07, "loss": -0.0153, "num_tokens": 45040344.0, "reward": 5.960464477539063e-08, "reward_std": 0.5505907535552979, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01923252980863088, "rewards/wordcountpos_reward/raw_geo/std": 0.07539343064477129, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 973.8125, "completions/mean_terminated_length": 973.8125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.20824164832966593, "frac_reward_zero_std": 0.0, "grad_norm": 3.6230334080118825, "kl": 0.01043701171875, "learning_rate": 9.683855361274702e-07, "loss": -0.033, "num_tokens": 45086949.0, "reward": -5.960464477539063e-08, "reward_std": 0.8344032764434814, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08313853851249176, "rewards/wordcountpos_reward/raw_geo/std": 0.17758198901836925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1404.875, "completions/mean_terminated_length": 1373.166748046875, "completions/min_length": 1268.0, "completions/min_terminated_length": 1268.0, "epoch": 0.20844168833766755, "frac_reward_zero_std": 0.0, "grad_norm": 2.1634322477222776, "kl": 0.00661468505859375, "learning_rate": 9.68269734207633e-07, "loss": 0.0223, "num_tokens": 45137075.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6665918827056885, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08271687474049956, "rewards/wordcountpos_reward/raw_geo/std": 0.10089660794624224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1018.375, "completions/mean_terminated_length": 1018.375, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.20864172834566913, "frac_reward_zero_std": 0.0, "grad_norm": 3.402176778991871, "kl": 0.009002685546875, "learning_rate": 9.681537283375741e-07, "loss": -0.0158, "num_tokens": 45184705.0, "reward": 0.0, "reward_std": 0.9108322858810425, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016036360514874032, "rewards/wordcountpos_reward/raw_geo/std": 0.1401057574100318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1107.8125, "completions/mean_terminated_length": 1107.8125, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.20884176835367074, "frac_reward_zero_std": 0.0, "grad_norm": 2.8765865360277427, "kl": 0.0115203857421875, "learning_rate": 9.680375185738587e-07, "loss": -0.031, "num_tokens": 45227462.0, "reward": 0.0, "reward_std": 1.0264365673065186, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06504414869856646, "rewards/wordcountpos_reward/raw_geo/std": 0.07875761126609393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 1097.25, "completions/mean_terminated_length": 1097.25, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.20904180836167233, "frac_reward_zero_std": 0.0, "grad_norm": 2.2016970759665, "kl": 0.00402069091796875, "learning_rate": 9.67921104973151e-07, "loss": 0.0092, "num_tokens": 45268962.0, "reward": -1.1175870895385742e-08, "reward_std": 0.9274089336395264, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06010035708222863, "rewards/wordcountpos_reward/raw_geo/std": 0.3090398875540753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1072.125, "completions/mean_terminated_length": 1072.125, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.20924184836967394, "frac_reward_zero_std": 0.0, "grad_norm": 2.9994268996822533, "kl": 0.0062255859375, "learning_rate": 9.678044875922147e-07, "loss": 0.0377, "num_tokens": 45311108.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0063072443008423, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10105689556638398, "rewards/wordcountpos_reward/raw_geo/std": 0.04508599698142703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1027.0, "completions/mean_terminated_length": 1027.0, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.20944188837767552, "frac_reward_zero_std": 0.0, "grad_norm": 2.447000251039581, "kl": 0.0064697265625, "learning_rate": 9.67687666487913e-07, "loss": -0.02, "num_tokens": 45361996.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9451016187667847, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007728871421802886, "rewards/wordcountpos_reward/raw_geo/std": 0.2379883134382006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1246.6875, "completions/mean_terminated_length": 1162.25, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.20964192838567713, "frac_reward_zero_std": 0.0, "grad_norm": 3.241285870311567, "kl": 0.010589599609375, "learning_rate": 9.675706417172084e-07, "loss": 0.005, "num_tokens": 45414223.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8232929706573486, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08335312582719405, "rewards/wordcountpos_reward/raw_geo/std": 0.10084649643342726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 885.3125, "completions/mean_terminated_length": 885.3125, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.20984196839367875, "frac_reward_zero_std": 0.0, "grad_norm": 2.710960868451215, "kl": 0.0064849853515625, "learning_rate": 9.674534133371629e-07, "loss": -0.0018, "num_tokens": 45446644.0, "reward": 0.0, "reward_std": 0.6163842678070068, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11342590766734405, "rewards/wordcountpos_reward/raw_geo/std": 0.08481188737958653, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15098442401882486, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1281.1875, "completions/mean_terminated_length": 1230.6923828125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.21004200840168033, "frac_reward_zero_std": 0.0, "grad_norm": 3.1793658987228026, "kl": 0.009857177734375, "learning_rate": 9.673359814049372e-07, "loss": -0.0261, "num_tokens": 45500079.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0164932012557983, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20837301836171723, "rewards/wordcountpos_reward/raw_geo/std": 0.17508007661172637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.21024204840968194, "frac_reward_zero_std": 0.0, "grad_norm": 3.587978684397212, "kl": 0.0127716064453125, "learning_rate": 9.672183459777922e-07, "loss": -0.008, "num_tokens": 45540732.0, "reward": 0.0, "reward_std": 0.5821975469589233, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11284981974797713, "rewards/wordcountpos_reward/raw_geo/std": 0.06318909683715614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043477, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 931.75, "completions/mean_terminated_length": 931.75, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.21044208841768353, "frac_reward_zero_std": 0.0, "grad_norm": 3.020482927286804, "kl": 0.00839996337890625, "learning_rate": 9.671005071130868e-07, "loss": -0.0012, "num_tokens": 45583240.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8391639590263367, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15111352624647637, "rewards/wordcountpos_reward/raw_geo/std": 0.08228366703570168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1197.25, "completions/mean_terminated_length": 1177.0667724609375, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.21064212842568514, "frac_reward_zero_std": 0.0, "grad_norm": 2.38701586915838, "kl": 0.00493621826171875, "learning_rate": 9.669824648682805e-07, "loss": -0.0186, "num_tokens": 45620772.0, "reward": 0.0, "reward_std": 0.6755601167678833, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053968503614717674, "rewards/wordcountpos_reward/raw_geo/std": 0.051191887898063886, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1225.3125, "completions/mean_terminated_length": 1161.923095703125, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.21084216843368675, "frac_reward_zero_std": 0.0, "grad_norm": 3.4163455851709816, "kl": 0.0115966796875, "learning_rate": 9.668642193009306e-07, "loss": -0.001, "num_tokens": 45665609.0, "reward": -7.450580596923828e-09, "reward_std": 0.9238741397857666, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13307950189102855, "rewards/wordcountpos_reward/raw_geo/std": 0.10841996812082676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1086.625, "completions/mean_terminated_length": 1086.625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.21104220844168833, "frac_reward_zero_std": 0.0, "grad_norm": 3.509020213821013, "kl": 0.0106658935546875, "learning_rate": 9.667457704686943e-07, "loss": -0.0135, "num_tokens": 45702547.0, "reward": 0.0, "reward_std": 0.648855984210968, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13632479433874617, "rewards/wordcountpos_reward/raw_geo/std": 0.11279095120114178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 912.0625, "completions/mean_terminated_length": 912.0625, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.21124224844968995, "frac_reward_zero_std": 0.0, "grad_norm": 2.9178123293520177, "kl": 0.00921630859375, "learning_rate": 9.66627118429328e-07, "loss": -0.0651, "num_tokens": 45735508.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6051981449127197, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14563859013122746, "rewards/wordcountpos_reward/raw_geo/std": 0.17647188123951113, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1193.625, "completions/mean_terminated_length": 1173.2000732421875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.21144228845769153, "frac_reward_zero_std": 0.0, "grad_norm": 2.793992040655907, "kl": 0.00777435302734375, "learning_rate": 9.665082632406872e-07, "loss": 0.0167, "num_tokens": 45782486.0, "reward": 0.0, "reward_std": 0.8433632850646973, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07907572323693199, "rewards/wordcountpos_reward/raw_geo/std": 0.06316771403222832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1070.375, "completions/mean_terminated_length": 1070.375, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.21164232846569314, "frac_reward_zero_std": 0.0, "grad_norm": 2.7798368895577505, "kl": 0.00786590576171875, "learning_rate": 9.663892049607257e-07, "loss": 0.0368, "num_tokens": 45823916.0, "reward": 0.0, "reward_std": 0.9004707336425781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027594963536194485, "rewards/wordcountpos_reward/raw_geo/std": 0.079562450765131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1268.5, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.21184236847369473, "frac_reward_zero_std": 0.0, "grad_norm": 2.0835311256406075, "kl": 0.004917144775390625, "learning_rate": 9.662699436474969e-07, "loss": 0.0064, "num_tokens": 45873092.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9966124296188354, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06549685387733982, "rewards/wordcountpos_reward/raw_geo/std": 0.17348312533307966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 933.75, "completions/mean_terminated_length": 933.75, "completions/min_length": 672.0, "completions/min_terminated_length": 672.0, "epoch": 0.21204240848169634, "frac_reward_zero_std": 0.0, "grad_norm": 2.978527572282619, "kl": 0.00930023193359375, "learning_rate": 9.661504793591536e-07, "loss": 0.0002, "num_tokens": 45911520.0, "reward": -2.9802322387695312e-08, "reward_std": 0.664455235004425, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13253658253712405, "rewards/wordcountpos_reward/raw_geo/std": 0.10800361605666144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1371.625, "completions/mean_terminated_length": 1271.77783203125, "completions/min_length": 1120.0, "completions/min_terminated_length": 1120.0, "epoch": 0.21224244848969795, "frac_reward_zero_std": 0.0, "grad_norm": 2.8444264432079427, "kl": 0.0092315673828125, "learning_rate": 9.660308121539469e-07, "loss": -0.0092, "num_tokens": 45965930.0, "reward": -1.4901161193847656e-08, "reward_std": 1.000205159187317, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17549675618436897, "rewards/wordcountpos_reward/raw_geo/std": 0.2991769376030446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1165.3125, "completions/mean_terminated_length": 964.5, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.21244248849769953, "frac_reward_zero_std": 0.0, "grad_norm": 3.591085584940392, "kl": 0.011962890625, "learning_rate": 9.659109420902268e-07, "loss": -0.0158, "num_tokens": 46016799.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0620059967041016, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14965807290379401, "rewards/wordcountpos_reward/raw_geo/std": 0.3004387441070456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1056.0, "completions/mean_terminated_length": 1056.0, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.21264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 3.4207288658657964, "kl": 0.00936126708984375, "learning_rate": 9.65790869226443e-07, "loss": -0.0202, "num_tokens": 46061543.0, "reward": 0.0, "reward_std": 0.8493836522102356, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13030053737032024, "rewards/wordcountpos_reward/raw_geo/std": 0.2348020547535778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1119.0, "completions/max_terminated_length": 1119.0, "completions/mean_length": 869.0625, "completions/mean_terminated_length": 869.0625, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.21284256851370273, "frac_reward_zero_std": 0.0, "grad_norm": 3.19263064361344, "kl": 0.00640869140625, "learning_rate": 9.65670593621143e-07, "loss": 0.0674, "num_tokens": 46099832.0, "reward": 0.0, "reward_std": 1.0267257690429688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007941629760634433, "rewards/wordcountpos_reward/raw_geo/std": 0.19021647278088488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1129.3125, "completions/mean_terminated_length": 1104.60009765625, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.21304260852170434, "frac_reward_zero_std": 0.0, "grad_norm": 3.615103622792107, "kl": 0.012664794921875, "learning_rate": 9.655501153329743e-07, "loss": -0.0711, "num_tokens": 46149597.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8551164865493774, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14384238241639116, "rewards/wordcountpos_reward/raw_geo/std": 0.11721721854152951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1005.6875, "completions/mean_terminated_length": 1005.6875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.21324264852970595, "frac_reward_zero_std": 0.0, "grad_norm": 3.1498245107586467, "kl": 0.00713348388671875, "learning_rate": 9.654294344206822e-07, "loss": -0.0089, "num_tokens": 46199496.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5969727039337158, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11124711081266404, "rewards/wordcountpos_reward/raw_geo/std": 0.13319919426396323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1407.625, "completions/mean_terminated_length": 1253.666748046875, "completions/min_length": 1128.0, "completions/min_terminated_length": 1128.0, "epoch": 0.21344268853770754, "frac_reward_zero_std": 0.0, "grad_norm": 2.9984404393330983, "kl": 0.0108642578125, "learning_rate": 9.653085509431115e-07, "loss": -0.0039, "num_tokens": 46253370.0, "reward": 0.0, "reward_std": 0.6834812164306641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11845751290341999, "rewards/wordcountpos_reward/raw_geo/std": 0.06554628022720338, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1102.1875, "completions/mean_terminated_length": 1102.1875, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.21364272854570915, "frac_reward_zero_std": 0.0, "grad_norm": 2.513547314579187, "kl": 0.00943756103515625, "learning_rate": 9.651874649592055e-07, "loss": -0.0135, "num_tokens": 46300453.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8549100160598755, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08059477219930013, "rewards/wordcountpos_reward/raw_geo/std": 0.09359746601571091, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1159.9375, "completions/mean_terminated_length": 1137.2667236328125, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.21384276855371073, "frac_reward_zero_std": 0.0, "grad_norm": 3.54619867232021, "kl": 0.00970458984375, "learning_rate": 9.650661765280062e-07, "loss": 0.0268, "num_tokens": 46346956.0, "reward": 0.0, "reward_std": 1.0458364486694336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12527216717577552, "rewards/wordcountpos_reward/raw_geo/std": 0.21419353369906438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1135.4375, "completions/mean_terminated_length": 1083.357177734375, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.21404280856171234, "frac_reward_zero_std": 0.0, "grad_norm": 2.5576607830932514, "kl": 0.00873565673828125, "learning_rate": 9.649446857086547e-07, "loss": -0.0102, "num_tokens": 46390283.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6127912998199463, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14722177944036385, "rewards/wordcountpos_reward/raw_geo/std": 0.09576126114317503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.22273551829717486, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1341.0, "completions/mean_terminated_length": 1288.0, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.21424284856971396, "frac_reward_zero_std": 0.0, "grad_norm": 2.6071929831578657, "kl": 0.013671875, "learning_rate": 9.648229925603898e-07, "loss": -0.0413, "num_tokens": 46444107.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8557888269424438, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05484255920086129, "rewards/wordcountpos_reward/raw_geo/std": 0.06143771948901399, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 991.7857666015625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.21444288857771554, "frac_reward_zero_std": 0.0, "grad_norm": 2.652320596881037, "kl": 0.0081329345703125, "learning_rate": 9.647010971425503e-07, "loss": 0.0135, "num_tokens": 46495344.0, "reward": 0.0, "reward_std": 0.8134421110153198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.087615328207544, "rewards/wordcountpos_reward/raw_geo/std": 0.09709196364717053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1018.875, "completions/mean_terminated_length": 1018.875, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.21464292858571715, "frac_reward_zero_std": 0.0, "grad_norm": 3.657674774712119, "kl": 0.00905609130859375, "learning_rate": 9.645789995145727e-07, "loss": 0.0133, "num_tokens": 46525654.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9971361756324768, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10032695983383245, "rewards/wordcountpos_reward/raw_geo/std": 0.043220729766449195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1150.5625, "completions/mean_terminated_length": 1069.923095703125, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.21484296859371874, "frac_reward_zero_std": 0.0, "grad_norm": 2.9314360367275305, "kl": 0.00862884521484375, "learning_rate": 9.644566997359924e-07, "loss": -0.0507, "num_tokens": 46560535.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9673082828521729, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05799556343012018, "rewards/wordcountpos_reward/raw_geo/std": 0.033834630381845074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1247.6875, "completions/mean_terminated_length": 1247.6875, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.21504300860172035, "frac_reward_zero_std": 0.0, "grad_norm": 3.3839645467628334, "kl": 0.0104522705078125, "learning_rate": 9.643341978664432e-07, "loss": -0.0185, "num_tokens": 46605698.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9555608630180359, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19188525105786305, "rewards/wordcountpos_reward/raw_geo/std": 0.10247647472578679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 957.6875, "completions/mean_terminated_length": 957.6875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.21524304860972193, "frac_reward_zero_std": 0.0, "grad_norm": 3.4530412537920028, "kl": 0.00930023193359375, "learning_rate": 9.642114939656579e-07, "loss": -0.0232, "num_tokens": 46645605.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9252154231071472, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05879350998611767, "rewards/wordcountpos_reward/raw_geo/std": 0.07150558204956681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1257.875, "completions/mean_terminated_length": 1069.5555419921875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.21544308861772354, "frac_reward_zero_std": 0.0, "grad_norm": 3.140753769990601, "kl": 0.0104827880859375, "learning_rate": 9.64088588093467e-07, "loss": 0.0025, "num_tokens": 46693555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9229426383972168, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0829657690297842, "rewards/wordcountpos_reward/raw_geo/std": 0.10869345599233637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1150.1875, "completions/mean_terminated_length": 1150.1875, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.21564312862572516, "frac_reward_zero_std": 0.0, "grad_norm": 2.884670916257981, "kl": 0.00891876220703125, "learning_rate": 9.639654803098003e-07, "loss": -0.0335, "num_tokens": 46731406.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9441984295845032, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020168653768786463, "rewards/wordcountpos_reward/raw_geo/std": 0.08700413303880884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1242.8125, "completions/mean_terminated_length": 1225.666748046875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.21584316863372674, "frac_reward_zero_std": 0.0, "grad_norm": 3.389205647697212, "kl": 0.0114288330078125, "learning_rate": 9.638421706746857e-07, "loss": 0.0108, "num_tokens": 46777259.0, "reward": 0.0, "reward_std": 0.5140067934989929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07717342371003036, "rewards/wordcountpos_reward/raw_geo/std": 0.10192442662462703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1261.5, "completions/mean_terminated_length": 1261.5, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.21604320864172835, "frac_reward_zero_std": 0.0, "grad_norm": 2.9733405400614323, "kl": 0.0107574462890625, "learning_rate": 9.637186592482493e-07, "loss": -0.0665, "num_tokens": 46821347.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6560477018356323, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05107118868237846, "rewards/wordcountpos_reward/raw_geo/std": 0.04508344034151571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1349.25, "completions/mean_terminated_length": 1280.727294921875, "completions/min_length": 547.0, "completions/min_terminated_length": 547.0, "epoch": 0.21624324864972994, "frac_reward_zero_std": 0.0, "grad_norm": 2.936939892332347, "kl": 0.00922393798828125, "learning_rate": 9.63594946090716e-07, "loss": -0.0265, "num_tokens": 46867023.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0533491373062134, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11389784233958072, "rewards/wordcountpos_reward/raw_geo/std": 0.09639986584671074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1063.375, "completions/mean_terminated_length": 1063.375, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.21644328865773155, "frac_reward_zero_std": 0.0, "grad_norm": 3.6741507197046364, "kl": 0.011810302734375, "learning_rate": 9.634710312624091e-07, "loss": -0.0796, "num_tokens": 46906117.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0175392627716064, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22790182344918752, "rewards/wordcountpos_reward/raw_geo/std": 0.2431798284676306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1109.1875, "completions/mean_terminated_length": 1109.1875, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.21664332866573316, "frac_reward_zero_std": 0.0, "grad_norm": 3.7717909112352097, "kl": 0.0139617919921875, "learning_rate": 9.633469148237496e-07, "loss": -0.0517, "num_tokens": 46953760.0, "reward": 0.0, "reward_std": 0.7441670894622803, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10746334610262578, "rewards/wordcountpos_reward/raw_geo/std": 0.23615392774343336, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1233.5, "completions/mean_terminated_length": 1233.5, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.21684336867373474, "frac_reward_zero_std": 0.0, "grad_norm": 3.261071754202068, "kl": 0.0125274658203125, "learning_rate": 9.632225968352577e-07, "loss": -0.002, "num_tokens": 46997336.0, "reward": 0.0, "reward_std": 0.8631924390792847, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3820017222852813, "rewards/wordcountpos_reward/raw_geo/std": 0.1072446550454882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 1025.666748046875, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.21704340868173636, "frac_reward_zero_std": 0.0, "grad_norm": 2.7425358097889347, "kl": 0.00731658935546875, "learning_rate": 9.63098077357551e-07, "loss": -0.0496, "num_tokens": 47035333.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0211741924285889, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.25938255963840157, "rewards/wordcountpos_reward/raw_geo/std": 0.16069610745421134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 969.3125, "completions/mean_terminated_length": 969.3125, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.21724344868973794, "frac_reward_zero_std": 0.0, "grad_norm": 2.734345322927155, "kl": 0.0081024169921875, "learning_rate": 9.62973356451346e-07, "loss": -0.0055, "num_tokens": 47075010.0, "reward": 0.0, "reward_std": 0.7407355308532715, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0956907719983653, "rewards/wordcountpos_reward/raw_geo/std": 0.10088141257511246, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1393.25, "completions/mean_terminated_length": 1256.0, "completions/min_length": 1152.0, "completions/min_terminated_length": 1152.0, "epoch": 0.21744348869773955, "frac_reward_zero_std": 0.0, "grad_norm": 2.6552941463097226, "kl": 0.00748443603515625, "learning_rate": 9.62848434177457e-07, "loss": -0.0006, "num_tokens": 47131094.0, "reward": 0.0, "reward_std": 0.6017328500747681, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23561267216962592, "rewards/wordcountpos_reward/raw_geo/std": 0.3683826869711652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1390.5625, "completions/mean_terminated_length": 1305.4444580078125, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.21764352870574113, "frac_reward_zero_std": 0.0, "grad_norm": 1.4987613557758759, "kl": 0.002590179443359375, "learning_rate": 9.62723310596797e-07, "loss": -0.0035, "num_tokens": 47171191.0, "reward": 0.0, "reward_std": 0.7417831420898438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06531239617328487, "rewards/wordcountpos_reward/raw_geo/std": 0.08396849305583833, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1179.4375, "completions/mean_terminated_length": 1033.727294921875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.21784356871374275, "frac_reward_zero_std": 0.0, "grad_norm": 2.688953630068596, "kl": 0.007434844970703125, "learning_rate": 9.625979857703764e-07, "loss": 0.0315, "num_tokens": 47215454.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9893736243247986, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13179421208080838, "rewards/wordcountpos_reward/raw_geo/std": 0.15745701121512068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.21804360872174436, "frac_reward_zero_std": 0.0, "grad_norm": 3.7875600831645992, "kl": 0.0123748779296875, "learning_rate": 9.624724597593045e-07, "loss": 0.0307, "num_tokens": 47258870.0, "reward": 4.470348358154297e-08, "reward_std": 1.0221328735351562, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.019465946730578918, "rewards/wordcountpos_reward/raw_geo/std": 0.21083970133961472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1120.125, "completions/mean_terminated_length": 1120.125, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.21824364872974594, "frac_reward_zero_std": 0.0, "grad_norm": 3.6495935900951055, "kl": 0.0113372802734375, "learning_rate": 9.623467326247882e-07, "loss": 0.021, "num_tokens": 47302568.0, "reward": -7.450580596923828e-09, "reward_std": 1.0099070072174072, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0021527520925362863, "rewards/wordcountpos_reward/raw_geo/std": 0.21148326701724174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1265.375, "completions/mean_terminated_length": 1249.7333984375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.21844368873774755, "frac_reward_zero_std": 0.0, "grad_norm": 3.1059810826047887, "kl": 0.00798797607421875, "learning_rate": 9.622208044281328e-07, "loss": -0.036, "num_tokens": 47353694.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9267206192016602, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08419330559818967, "rewards/wordcountpos_reward/raw_geo/std": 0.09904491812716718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1045.75, "completions/mean_terminated_length": 1015.4667358398438, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.21864372874574914, "frac_reward_zero_std": 0.0, "grad_norm": 3.4800212832063746, "kl": 0.0119781494140625, "learning_rate": 9.62094675230741e-07, "loss": -0.0402, "num_tokens": 47386554.0, "reward": 1.4901161193847656e-08, "reward_std": 1.021504282951355, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08419208140668184, "rewards/wordcountpos_reward/raw_geo/std": 0.1321926627557609, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1043.6875, "completions/mean_terminated_length": 978.5000610351562, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.21884376875375075, "frac_reward_zero_std": 0.0, "grad_norm": 2.997480363044305, "kl": 0.00927734375, "learning_rate": 9.619683450941146e-07, "loss": -0.05, "num_tokens": 47423021.0, "reward": 7.450580596923828e-09, "reward_std": 1.0676316022872925, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.058299689893451793, "rewards/wordcountpos_reward/raw_geo/std": 0.07477748799547167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 969.375, "completions/mean_terminated_length": 969.375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.21904380876175236, "frac_reward_zero_std": 0.0, "grad_norm": 3.701993884192393, "kl": 0.0095367431640625, "learning_rate": 9.61841814079852e-07, "loss": -0.0179, "num_tokens": 47464643.0, "reward": -4.470348358154297e-08, "reward_std": 1.0583786964416504, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0005548188706213583, "rewards/wordcountpos_reward/raw_geo/std": 0.0668510957510083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1000.8125, "completions/mean_terminated_length": 1000.8125, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.21924384876975395, "frac_reward_zero_std": 0.0, "grad_norm": 3.4843043379656935, "kl": 0.0113677978515625, "learning_rate": 9.61715082249651e-07, "loss": -0.0391, "num_tokens": 47505328.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8710429668426514, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.095183079682951, "rewards/wordcountpos_reward/raw_geo/std": 0.15707658879698072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1099.625, "completions/mean_terminated_length": 1099.625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.21944388877775556, "frac_reward_zero_std": 0.0, "grad_norm": 3.360794977203704, "kl": 0.013092041015625, "learning_rate": 9.615881496653062e-07, "loss": -0.0054, "num_tokens": 47556586.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8489280939102173, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09213657652364075, "rewards/wordcountpos_reward/raw_geo/std": 0.23457539672749986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1119.0625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.21964392878575714, "frac_reward_zero_std": 0.0, "grad_norm": 3.030450876480138, "kl": 0.00830078125, "learning_rate": 9.61461016388711e-07, "loss": 0.0412, "num_tokens": 47600235.0, "reward": -2.2351741790771484e-08, "reward_std": 0.8335460424423218, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00836911914178208, "rewards/wordcountpos_reward/raw_geo/std": 0.24142437106790882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1110.0, "completions/mean_terminated_length": 1110.0, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.21984396879375875, "frac_reward_zero_std": 0.0, "grad_norm": 3.5239661581575783, "kl": 0.0141448974609375, "learning_rate": 9.613336824818555e-07, "loss": -0.0596, "num_tokens": 47651075.0, "reward": 0.0, "reward_std": 0.8938028812408447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005489780703130847, "rewards/wordcountpos_reward/raw_geo/std": 0.012140746747440341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1156.0, "completions/mean_terminated_length": 1133.0667724609375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.22004400880176037, "frac_reward_zero_std": 0.0, "grad_norm": 1.7121219347752192, "kl": 0.00433349609375, "learning_rate": 9.612061480068286e-07, "loss": -0.0351, "num_tokens": 47689667.0, "reward": 0.0, "reward_std": 0.5347951650619507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026385070421797985, "rewards/wordcountpos_reward/raw_geo/std": 0.13086448896849112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1152.3125, "completions/mean_terminated_length": 1129.1334228515625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.22024404880976195, "frac_reward_zero_std": 0.0, "grad_norm": 3.0789162350557917, "kl": 0.0093231201171875, "learning_rate": 9.610784130258167e-07, "loss": 0.0136, "num_tokens": 47740800.0, "reward": 0.0, "reward_std": 0.6178066730499268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.001168259657906203, "rewards/wordcountpos_reward/raw_geo/std": 0.0663770610807529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1116.9375, "completions/mean_terminated_length": 1091.4000244140625, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.22044408881776356, "frac_reward_zero_std": 0.0, "grad_norm": 2.46952708536387, "kl": 0.00746917724609375, "learning_rate": 9.60950477601104e-07, "loss": 0.0484, "num_tokens": 47790343.0, "reward": 0.0, "reward_std": 0.7428834438323975, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16240022521641462, "rewards/wordcountpos_reward/raw_geo/std": 0.1749434215610104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1017.75, "completions/mean_terminated_length": 1017.75, "completions/min_length": 642.0, "completions/min_terminated_length": 642.0, "epoch": 0.22064412882576515, "frac_reward_zero_std": 0.0, "grad_norm": 2.676057969506465, "kl": 0.0108184814453125, "learning_rate": 9.608223417950724e-07, "loss": -0.0628, "num_tokens": 47829099.0, "reward": 0.0, "reward_std": 0.35619881749153137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010071330272125869, "rewards/wordcountpos_reward/raw_geo/std": 0.07321271168142714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 992.0625, "completions/mean_terminated_length": 992.0625, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.22084416883376676, "frac_reward_zero_std": 0.0, "grad_norm": 3.454878438968108, "kl": 0.0084991455078125, "learning_rate": 9.606940056702012e-07, "loss": 0.014, "num_tokens": 47858892.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9214839339256287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03187644879473321, "rewards/wordcountpos_reward/raw_geo/std": 0.030046298838688503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1086.8125, "completions/mean_terminated_length": 1027.7857666015625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.22104420884176834, "frac_reward_zero_std": 0.0, "grad_norm": 3.5982543760672043, "kl": 0.0143280029296875, "learning_rate": 9.60565469289068e-07, "loss": 0.0386, "num_tokens": 47910249.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0246981382369995, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010324658578464603, "rewards/wordcountpos_reward/raw_geo/std": 0.09624074913485281, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1016.625, "completions/mean_terminated_length": 1016.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.22124424884976995, "frac_reward_zero_std": 0.0, "grad_norm": 3.3422218474105323, "kl": 0.011505126953125, "learning_rate": 9.604367327143478e-07, "loss": 0.0092, "num_tokens": 47946443.0, "reward": 0.0, "reward_std": 0.9952700734138489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14385924480877452, "rewards/wordcountpos_reward/raw_geo/std": 0.09638461475958361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1096.375, "completions/mean_terminated_length": 1069.4666748046875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.22144428885777157, "frac_reward_zero_std": 0.0, "grad_norm": 3.6080633490197127, "kl": 0.0122833251953125, "learning_rate": 9.603077960088128e-07, "loss": -0.0304, "num_tokens": 47989537.0, "reward": 0.0, "reward_std": 0.7699503898620605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0472935547124475, "rewards/wordcountpos_reward/raw_geo/std": 0.2410029442636135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 871.6875, "completions/mean_terminated_length": 871.6875, "completions/min_length": 589.0, "completions/min_terminated_length": 589.0, "epoch": 0.22164432886577315, "frac_reward_zero_std": 0.0, "grad_norm": 4.066980926253353, "kl": 0.0133056640625, "learning_rate": 9.601786592353334e-07, "loss": -0.007, "num_tokens": 48031116.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9728751182556152, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13546114232943185, "rewards/wordcountpos_reward/raw_geo/std": 0.08543285348546935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14605934866804432, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1162.875, "completions/mean_terminated_length": 1140.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.22184436887377476, "frac_reward_zero_std": 0.0, "grad_norm": 3.4521268437708716, "kl": 0.0103302001953125, "learning_rate": 9.60049322456877e-07, "loss": 0.0557, "num_tokens": 48076762.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8686368465423584, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013034868752373422, "rewards/wordcountpos_reward/raw_geo/std": 0.2089271658399975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1079.4375, "completions/mean_terminated_length": 1019.357177734375, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.22204440888177635, "frac_reward_zero_std": 0.0, "grad_norm": 1.8663965121316035, "kl": 0.005329132080078125, "learning_rate": 9.599197857365091e-07, "loss": 0.0023, "num_tokens": 48110873.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9579967260360718, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08761430715655595, "rewards/wordcountpos_reward/raw_geo/std": 0.14287824230326895, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1025.8125, "completions/mean_terminated_length": 1025.8125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.22224444888977796, "frac_reward_zero_std": 0.0, "grad_norm": 3.7588637799808393, "kl": 0.0133056640625, "learning_rate": 9.597900491373925e-07, "loss": -0.0151, "num_tokens": 48142374.0, "reward": 1.862645149230957e-09, "reward_std": 1.0671457052230835, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06101799691617067, "rewards/wordcountpos_reward/raw_geo/std": 0.05754629104442586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1130.1875, "completions/mean_terminated_length": 1130.1875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.22244448889777957, "frac_reward_zero_std": 0.0, "grad_norm": 2.5824466498227743, "kl": 0.00577545166015625, "learning_rate": 9.596601127227868e-07, "loss": 0.0221, "num_tokens": 48192665.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8147425055503845, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15150011814161943, "rewards/wordcountpos_reward/raw_geo/std": 0.05762348104532725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000003, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1168.9375, "completions/mean_terminated_length": 1168.9375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.22264452890578115, "frac_reward_zero_std": 0.0, "grad_norm": 2.876198678325462, "kl": 0.0066375732421875, "learning_rate": 9.5952997655605e-07, "loss": 0.0108, "num_tokens": 48237280.0, "reward": 7.450580596923828e-09, "reward_std": 1.0668383836746216, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16322475765493205, "rewards/wordcountpos_reward/raw_geo/std": 0.07945382332079957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1292.0625, "completions/mean_terminated_length": 1197.5455322265625, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.22284456891378276, "frac_reward_zero_std": 0.0, "grad_norm": 2.7135595372343557, "kl": 0.00738525390625, "learning_rate": 9.59399640700637e-07, "loss": 0.0526, "num_tokens": 48283553.0, "reward": 0.0, "reward_std": 0.4320484697818756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11010380028541027, "rewards/wordcountpos_reward/raw_geo/std": 0.05293052729182484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1047.1875, "completions/mean_terminated_length": 1047.1875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.22304460892178435, "frac_reward_zero_std": 0.0, "grad_norm": 3.4691264425482964, "kl": 0.01140594482421875, "learning_rate": 9.592691052201002e-07, "loss": 0.0508, "num_tokens": 48335100.0, "reward": 0.0, "reward_std": 0.7157114744186401, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07747688649833151, "rewards/wordcountpos_reward/raw_geo/std": 0.08424853145906115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1278.1875, "completions/mean_terminated_length": 1263.4000244140625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.22324464892978596, "frac_reward_zero_std": 0.0, "grad_norm": 2.9849673377282313, "kl": 0.0117340087890625, "learning_rate": 9.59138370178089e-07, "loss": 0.0211, "num_tokens": 48380975.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9721260666847229, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060908954852252485, "rewards/wordcountpos_reward/raw_geo/std": 0.19434850795220185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1162.75, "completions/mean_terminated_length": 1162.75, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.22344468893778754, "frac_reward_zero_std": 0.0, "grad_norm": 2.474536408317372, "kl": 0.00801849365234375, "learning_rate": 9.59007435638351e-07, "loss": 0.0025, "num_tokens": 48422395.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9989966154098511, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018420093141459946, "rewards/wordcountpos_reward/raw_geo/std": 0.08744868483120835, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1278.125, "completions/mean_terminated_length": 1263.3333740234375, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.22364472894578916, "frac_reward_zero_std": 0.0, "grad_norm": 3.2121569806165096, "kl": 0.0118560791015625, "learning_rate": 9.588763016647298e-07, "loss": 0.022, "num_tokens": 48463317.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0513410568237305, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11370960032642238, "rewards/wordcountpos_reward/raw_geo/std": 0.10088892469762108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1092.0625, "completions/mean_terminated_length": 1092.0625, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.22384476895379077, "frac_reward_zero_std": 0.0, "grad_norm": 3.458220733653616, "kl": 0.01068115234375, "learning_rate": 9.587449683211675e-07, "loss": 0.0009, "num_tokens": 48504526.0, "reward": 0.0, "reward_std": 0.9841119050979614, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014842705961776847, "rewards/wordcountpos_reward/raw_geo/std": 0.10813386407886666, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1034.0625, "completions/mean_terminated_length": 1034.0625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.22404480896179235, "frac_reward_zero_std": 0.0, "grad_norm": 3.8127942821617316, "kl": 0.013946533203125, "learning_rate": 9.586134356717026e-07, "loss": -0.0119, "num_tokens": 48540343.0, "reward": 0.0, "reward_std": 0.9207268357276917, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1378950626026639, "rewards/wordcountpos_reward/raw_geo/std": 0.23956508060585524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1206.7333984375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.22424484896979396, "frac_reward_zero_std": 0.0, "grad_norm": 2.2018069503523314, "kl": 0.00780487060546875, "learning_rate": 9.584817037804708e-07, "loss": -0.0297, "num_tokens": 48592896.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9770029187202454, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.032315323023053046, "rewards/wordcountpos_reward/raw_geo/std": 0.11925691920129869, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1025.625, "completions/mean_terminated_length": 1025.625, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.22444488897779555, "frac_reward_zero_std": 0.0, "grad_norm": 3.698286618595617, "kl": 0.011627197265625, "learning_rate": 9.583497727117054e-07, "loss": -0.0067, "num_tokens": 48628706.0, "reward": -7.450580596923828e-09, "reward_std": 1.0062321424484253, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06503153069792313, "rewards/wordcountpos_reward/raw_geo/std": 0.08998514144249806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1307.0625, "completions/mean_terminated_length": 1242.75, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.22464492898579716, "frac_reward_zero_std": 0.0, "grad_norm": 3.273451351840682, "kl": 0.01081085205078125, "learning_rate": 9.582176425297366e-07, "loss": -0.0152, "num_tokens": 48675587.0, "reward": 5.960464477539063e-08, "reward_std": 0.9039405584335327, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10430136300178121, "rewards/wordcountpos_reward/raw_geo/std": 0.1590083314456146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 884.125, "completions/mean_terminated_length": 884.125, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.22484496899379877, "frac_reward_zero_std": 0.0, "grad_norm": 3.548644235465976, "kl": 0.013275146484375, "learning_rate": 9.580853132989916e-07, "loss": -0.0612, "num_tokens": 48704829.0, "reward": 0.0, "reward_std": 0.8891583681106567, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1963759722696244, "rewards/wordcountpos_reward/raw_geo/std": 0.20729454216894955, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1283.75, "completions/mean_terminated_length": 1067.5, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.22504500900180036, "frac_reward_zero_std": 0.0, "grad_norm": 2.9299087106163553, "kl": 0.00789642333984375, "learning_rate": 9.579527850839947e-07, "loss": -0.0191, "num_tokens": 48759401.0, "reward": 0.0, "reward_std": 0.43805375695228577, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16331836598346472, "rewards/wordcountpos_reward/raw_geo/std": 0.16847366690919163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1195.9375, "completions/mean_terminated_length": 1013.5, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.22524504900980197, "frac_reward_zero_std": 0.0, "grad_norm": 2.836403554620855, "kl": 0.01186370849609375, "learning_rate": 9.578200579493674e-07, "loss": 0.0194, "num_tokens": 48813920.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0379304885864258, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1867886348921156, "rewards/wordcountpos_reward/raw_geo/std": 0.3004409839226052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1242.125, "completions/mean_terminated_length": 1224.933349609375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.22544508901780355, "frac_reward_zero_std": 0.0, "grad_norm": 3.0742371251680205, "kl": 0.0081939697265625, "learning_rate": 9.57687131959828e-07, "loss": 0.0089, "num_tokens": 48854578.0, "reward": 0.0, "reward_std": 0.49281227588653564, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05675058672156692, "rewards/wordcountpos_reward/raw_geo/std": 0.2052890713261545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1140.125, "completions/mean_terminated_length": 1140.125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.22564512902580516, "frac_reward_zero_std": 0.0, "grad_norm": 3.464679080041118, "kl": 0.0096893310546875, "learning_rate": 9.575540071801917e-07, "loss": 0.0105, "num_tokens": 48903204.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8845804929733276, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.051096158660686286, "rewards/wordcountpos_reward/raw_geo/std": 0.10200238871034564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1257.0625, "completions/mean_terminated_length": 1111.300048828125, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.22584516903380678, "frac_reward_zero_std": 0.0, "grad_norm": 2.0839412138352342, "kl": 0.01325225830078125, "learning_rate": 9.574206836753708e-07, "loss": 0.0181, "num_tokens": 48939773.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0020803213119507, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11956820538101501, "rewards/wordcountpos_reward/raw_geo/std": 0.06713976474032227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1156.625, "completions/mean_terminated_length": 1156.625, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.22604520904180836, "frac_reward_zero_std": 0.0, "grad_norm": 3.1467857972762303, "kl": 0.00982666015625, "learning_rate": 9.572871615103747e-07, "loss": -0.0235, "num_tokens": 48993623.0, "reward": 0.0, "reward_std": 0.6768962740898132, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2169620158247195, "rewards/wordcountpos_reward/raw_geo/std": 0.2526734056268605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1108.875, "completions/mean_terminated_length": 1108.875, "completions/min_length": 407.0, "completions/min_terminated_length": 407.0, "epoch": 0.22624524904980997, "frac_reward_zero_std": 0.0, "grad_norm": 2.5548248136724037, "kl": 0.00775146484375, "learning_rate": 9.57153440750309e-07, "loss": -0.0419, "num_tokens": 49035437.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6830762624740601, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015143340831155217, "rewards/wordcountpos_reward/raw_geo/std": 0.21444756602406698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1167.8125, "completions/mean_terminated_length": 1167.8125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.22644528905781156, "frac_reward_zero_std": 0.0, "grad_norm": 3.4320774059457047, "kl": 0.012542724609375, "learning_rate": 9.570195214603767e-07, "loss": -0.0185, "num_tokens": 49079130.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0202593803405762, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03801267092001991, "rewards/wordcountpos_reward/raw_geo/std": 0.11349513414927752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1234.75, "completions/mean_terminated_length": 1217.0667724609375, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.22664532906581317, "frac_reward_zero_std": 0.0, "grad_norm": 3.2227580728503566, "kl": 0.011871337890625, "learning_rate": 9.568854037058776e-07, "loss": -0.0124, "num_tokens": 49121486.0, "reward": 0.0, "reward_std": 0.5776551961898804, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02580008849692016, "rewards/wordcountpos_reward/raw_geo/std": 0.28219799650593286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1179.0625, "completions/mean_terminated_length": 1157.666748046875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.22684536907381475, "frac_reward_zero_std": 0.0, "grad_norm": 3.036328135469364, "kl": 0.0126800537109375, "learning_rate": 9.567510875522081e-07, "loss": 0.0139, "num_tokens": 49169599.0, "reward": 0.0, "reward_std": 0.4948246479034424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14580489580228434, "rewards/wordcountpos_reward/raw_geo/std": 0.39604311801128295, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1239.4375, "completions/mean_terminated_length": 1152.5833740234375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.22704540908181636, "frac_reward_zero_std": 0.0, "grad_norm": 2.9692546080182995, "kl": 0.0095977783203125, "learning_rate": 9.566165730648613e-07, "loss": 0.0074, "num_tokens": 49224798.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5619721412658691, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037732782637500226, "rewards/wordcountpos_reward/raw_geo/std": 0.10634662945839567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 849.0, "completions/mean_length": 1140.25, "completions/mean_terminated_length": 780.5, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.22724544908981797, "frac_reward_zero_std": 0.0, "grad_norm": 1.983648284010815, "kl": 0.00554656982421875, "learning_rate": 9.56481860309427e-07, "loss": -0.0273, "num_tokens": 49259602.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0375480651855469, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08967808831111865, "rewards/wordcountpos_reward/raw_geo/std": 0.04869483669544209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1198.9375, "completions/mean_terminated_length": 1129.4615478515625, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.22744548909781956, "frac_reward_zero_std": 0.0, "grad_norm": 3.1990311145866666, "kl": 0.0085906982421875, "learning_rate": 9.563469493515917e-07, "loss": -0.0417, "num_tokens": 49312369.0, "reward": 0.0, "reward_std": 0.8134428262710571, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006953747030456145, "rewards/wordcountpos_reward/raw_geo/std": 0.07126850547692305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1293.6875, "completions/mean_terminated_length": 1264.21435546875, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.22764552910582117, "frac_reward_zero_std": 0.0, "grad_norm": 2.9646538707044643, "kl": 0.009735107421875, "learning_rate": 9.562118402571387e-07, "loss": 0.0256, "num_tokens": 49366948.0, "reward": 0.0, "reward_std": 0.9062649011611938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1755036323143243, "rewards/wordcountpos_reward/raw_geo/std": 0.13758589289456213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1038.0, "completions/mean_terminated_length": 1038.0, "completions/min_length": 634.0, "completions/min_terminated_length": 634.0, "epoch": 0.22784556911382275, "frac_reward_zero_std": 0.0, "grad_norm": 3.465762852651177, "kl": 0.0115203857421875, "learning_rate": 9.56076533091948e-07, "loss": -0.0162, "num_tokens": 49414388.0, "reward": 0.0, "reward_std": 0.8003247380256653, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025844836836400476, "rewards/wordcountpos_reward/raw_geo/std": 0.14025250485777443, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1118.25, "completions/mean_terminated_length": 1118.25, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.22804560912182437, "frac_reward_zero_std": 0.0, "grad_norm": 2.9878308663697273, "kl": 0.0159454345703125, "learning_rate": 9.559410279219959e-07, "loss": -0.0608, "num_tokens": 49465936.0, "reward": 0.0, "reward_std": 0.9567099809646606, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13945296066673965, "rewards/wordcountpos_reward/raw_geo/std": 0.09788984981907481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1278.1875, "completions/mean_terminated_length": 1145.0999755859375, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.22824564912982598, "frac_reward_zero_std": 0.0, "grad_norm": 2.857512459046222, "kl": 0.00829315185546875, "learning_rate": 9.55805324813355e-07, "loss": 0.0269, "num_tokens": 49502515.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8453488349914551, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01508588083925872, "rewards/wordcountpos_reward/raw_geo/std": 0.15094952534510675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14291929864761418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1179.8125, "completions/mean_terminated_length": 1179.8125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.22844568913782756, "frac_reward_zero_std": 0.0, "grad_norm": 2.477461226157233, "kl": 0.006866455078125, "learning_rate": 9.55669423832195e-07, "loss": -0.0328, "num_tokens": 49544600.0, "reward": 0.0, "reward_std": 0.5849494934082031, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05452017226884918, "rewards/wordcountpos_reward/raw_geo/std": 0.1440268624736924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1335.0625, "completions/mean_terminated_length": 1170.125, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.22864572914582917, "frac_reward_zero_std": 0.0, "grad_norm": 2.6088982419374096, "kl": 0.0110626220703125, "learning_rate": 9.555333250447819e-07, "loss": -0.0478, "num_tokens": 49600137.0, "reward": 0.0, "reward_std": 1.0005755424499512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18280361337928605, "rewards/wordcountpos_reward/raw_geo/std": 0.14928929397196405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1212.3125, "completions/mean_terminated_length": 1212.3125, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.22884576915383076, "frac_reward_zero_std": 0.0, "grad_norm": 2.4756730483559126, "kl": 0.00616455078125, "learning_rate": 9.55397028517478e-07, "loss": -0.0, "num_tokens": 49642414.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9680562019348145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11487212273724176, "rewards/wordcountpos_reward/raw_geo/std": 0.0633203560402893, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1281.0625, "completions/mean_terminated_length": 1266.4666748046875, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.22904580916183237, "frac_reward_zero_std": 0.0, "grad_norm": 2.1293868259622153, "kl": 0.00514984130859375, "learning_rate": 9.552605343167422e-07, "loss": 0.0115, "num_tokens": 49691319.0, "reward": 0.0, "reward_std": 1.000067949295044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16547145357457368, "rewards/wordcountpos_reward/raw_geo/std": 0.313838368698743, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1028.125, "completions/mean_terminated_length": 1028.125, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.22924584916983395, "frac_reward_zero_std": 0.0, "grad_norm": 3.3411597688026453, "kl": 0.010162353515625, "learning_rate": 9.551238425091295e-07, "loss": 0.0067, "num_tokens": 49731225.0, "reward": 0.0, "reward_std": 0.7595741748809814, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06714213245505474, "rewards/wordcountpos_reward/raw_geo/std": 0.0860507250350992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1006.5625, "completions/mean_terminated_length": 1006.5625, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.22944588917783557, "frac_reward_zero_std": 0.0, "grad_norm": 3.967118853695208, "kl": 0.0144195556640625, "learning_rate": 9.549869531612918e-07, "loss": -0.0047, "num_tokens": 49766530.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0180479288101196, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10311899965987689, "rewards/wordcountpos_reward/raw_geo/std": 0.12959071565285613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 928.0, "completions/mean_terminated_length": 928.0, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.22964592918583718, "frac_reward_zero_std": 0.0, "grad_norm": 3.593591430899529, "kl": 0.00914764404296875, "learning_rate": 9.548498663399764e-07, "loss": -0.0264, "num_tokens": 49802122.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5304368734359741, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10104776365921816, "rewards/wordcountpos_reward/raw_geo/std": 0.23351574491728172, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1309.1875, "completions/mean_terminated_length": 1281.9285888671875, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.22984596919383876, "frac_reward_zero_std": 0.0, "grad_norm": 3.1839698434067714, "kl": 0.0119781494140625, "learning_rate": 9.54712582112028e-07, "loss": -0.0143, "num_tokens": 49855869.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9298601150512695, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04880521198151409, "rewards/wordcountpos_reward/raw_geo/std": 0.10081132530439563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1249.25, "completions/mean_terminated_length": 1232.533447265625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.23004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 3.209259165924364, "kl": 0.0108795166015625, "learning_rate": 9.545751005443868e-07, "loss": -0.0606, "num_tokens": 49904537.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9427385330200195, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10436095468001841, "rewards/wordcountpos_reward/raw_geo/std": 0.26268860188677623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1134.6875, "completions/mean_terminated_length": 1050.3846435546875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.23024604920984196, "frac_reward_zero_std": 0.0, "grad_norm": 3.354877516071196, "kl": 0.010467529296875, "learning_rate": 9.544374217040894e-07, "loss": 0.0049, "num_tokens": 49956292.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0051803588867188, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01190230620216923, "rewards/wordcountpos_reward/raw_geo/std": 0.191618457315505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1101345977866612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1482.125, "completions/mean_terminated_length": 1452.3333740234375, "completions/min_length": 1389.0, "completions/min_terminated_length": 1389.0, "epoch": 0.23044608921784357, "frac_reward_zero_std": 0.0, "grad_norm": 2.6223389992079498, "kl": 0.0088348388671875, "learning_rate": 9.542995456582687e-07, "loss": 0.0033, "num_tokens": 50005134.0, "reward": 0.0, "reward_std": 0.947304368019104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12606385386716026, "rewards/wordcountpos_reward/raw_geo/std": 0.12027285791891168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1077.5625, "completions/mean_terminated_length": 1077.5625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.23064612922584518, "frac_reward_zero_std": 0.0, "grad_norm": 3.0286624973166134, "kl": 0.00902557373046875, "learning_rate": 9.541614724741535e-07, "loss": 0.0104, "num_tokens": 50045975.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9524807929992676, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20306906647160788, "rewards/wordcountpos_reward/raw_geo/std": 0.18801419388294252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1281.0, "completions/mean_terminated_length": 1266.4000244140625, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.23084616923384677, "frac_reward_zero_std": 0.0, "grad_norm": 3.049303298317739, "kl": 0.01068115234375, "learning_rate": 9.540232022190694e-07, "loss": -0.0387, "num_tokens": 50097655.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0689671039581299, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03316920708299982, "rewards/wordcountpos_reward/raw_geo/std": 0.05379554502182039, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1061.6875, "completions/mean_terminated_length": 1061.6875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.23104620924184838, "frac_reward_zero_std": 0.0, "grad_norm": 3.730527631698179, "kl": 0.01397705078125, "learning_rate": 9.538847349604369e-07, "loss": -0.0586, "num_tokens": 50135106.0, "reward": 2.2351741790771484e-08, "reward_std": 1.064586877822876, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06578180754015497, "rewards/wordcountpos_reward/raw_geo/std": 0.07157985794018576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1062.9375, "completions/mean_terminated_length": 1033.800048828125, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.23124624924984996, "frac_reward_zero_std": 0.0, "grad_norm": 3.643396681482157, "kl": 0.015380859375, "learning_rate": 9.53746070765774e-07, "loss": -0.0178, "num_tokens": 50188409.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0249221324920654, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2256795445270049, "rewards/wordcountpos_reward/raw_geo/std": 0.14552988590190669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1020.625, "completions/mean_terminated_length": 1020.625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.23144628925785157, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098740253943993, "kl": 0.008544921875, "learning_rate": 9.536072097026933e-07, "loss": -0.0422, "num_tokens": 50226579.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9652504920959473, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10180699751230987, "rewards/wordcountpos_reward/raw_geo/std": 0.2258504709352916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1088.75, "completions/mean_terminated_length": 1088.75, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.23164632926585316, "frac_reward_zero_std": 0.0, "grad_norm": 3.219279287198751, "kl": 0.011138916015625, "learning_rate": 9.534681518389045e-07, "loss": -0.0217, "num_tokens": 50269175.0, "reward": 1.4901161193847656e-08, "reward_std": 1.006312370300293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004155747679554641, "rewards/wordcountpos_reward/raw_geo/std": 0.19588092976753227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.23184636927385477, "frac_reward_zero_std": 0.0, "grad_norm": 3.2809789166485213, "kl": 0.0099639892578125, "learning_rate": 9.533288972422126e-07, "loss": -0.0238, "num_tokens": 50308723.0, "reward": -3.725290298461914e-08, "reward_std": 1.0568022727966309, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02578697472644781, "rewards/wordcountpos_reward/raw_geo/std": 0.11181167160441836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1323.75, "completions/mean_terminated_length": 1218.0, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.23204640928185638, "frac_reward_zero_std": 0.0, "grad_norm": 3.4299575068382664, "kl": 0.0163116455078125, "learning_rate": 9.531894459805192e-07, "loss": -0.0054, "num_tokens": 50361895.0, "reward": 0.0, "reward_std": 0.6450403332710266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05441950640453444, "rewards/wordcountpos_reward/raw_geo/std": 0.0722647779333742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1053.375, "completions/mean_terminated_length": 1023.6000366210938, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.23224644928985796, "frac_reward_zero_std": 0.0, "grad_norm": 1.5676348590334177, "kl": 0.003814697265625, "learning_rate": 9.53049798121821e-07, "loss": -0.0067, "num_tokens": 50395189.0, "reward": 0.0, "reward_std": 0.9334632158279419, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11211344232712508, "rewards/wordcountpos_reward/raw_geo/std": 0.10919833329008882, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1163.25, "completions/mean_terminated_length": 1140.800048828125, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.23244648929785958, "frac_reward_zero_std": 0.0, "grad_norm": 3.555029809478265, "kl": 0.0130462646484375, "learning_rate": 9.52909953734211e-07, "loss": -0.0273, "num_tokens": 50443057.0, "reward": 0.0, "reward_std": 1.0423115491867065, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20747905884940018, "rewards/wordcountpos_reward/raw_geo/std": 0.18202950707904117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1218.60009765625, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.23264652930586116, "frac_reward_zero_std": 0.0, "grad_norm": 2.80659615019871, "kl": 0.0106048583984375, "learning_rate": 9.527699128858779e-07, "loss": -0.0257, "num_tokens": 50488956.0, "reward": -7.450580596923828e-09, "reward_std": 1.0094435214996338, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11969988334729055, "rewards/wordcountpos_reward/raw_geo/std": 0.24580626314050372, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1004.75, "completions/mean_terminated_length": 1004.75, "completions/min_length": 481.0, "completions/min_terminated_length": 481.0, "epoch": 0.23284656931386277, "frac_reward_zero_std": 0.0, "grad_norm": 3.5443095289446336, "kl": 0.00855255126953125, "learning_rate": 9.526296756451065e-07, "loss": -0.0046, "num_tokens": 50520216.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0298347473144531, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013478362850192306, "rewards/wordcountpos_reward/raw_geo/std": 0.03204082669449738, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 826.875, "completions/mean_terminated_length": 826.875, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.23304660932186438, "frac_reward_zero_std": 0.0, "grad_norm": 3.070213035890712, "kl": 0.00722503662109375, "learning_rate": 9.524892420802769e-07, "loss": -0.026, "num_tokens": 50568742.0, "reward": 0.0, "reward_std": 0.8031182289123535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2236438509358696, "rewards/wordcountpos_reward/raw_geo/std": 0.2675904732837485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1257.875, "completions/mean_terminated_length": 1112.5999755859375, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.23324664932986597, "frac_reward_zero_std": 0.0, "grad_norm": 3.4068947855956675, "kl": 0.010833740234375, "learning_rate": 9.523486122598652e-07, "loss": -0.0502, "num_tokens": 50621972.0, "reward": -5.960464477539063e-08, "reward_std": 0.633068323135376, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05564615635771998, "rewards/wordcountpos_reward/raw_geo/std": 0.04240078724855802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1202.5625, "completions/mean_terminated_length": 1202.5625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.23344668933786758, "frac_reward_zero_std": 0.0, "grad_norm": 2.8533288749958, "kl": 0.014068603515625, "learning_rate": 9.522077862524432e-07, "loss": -0.0125, "num_tokens": 50674229.0, "reward": 0.0, "reward_std": 0.9095343351364136, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07193658663866266, "rewards/wordcountpos_reward/raw_geo/std": 0.18047118540213775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1085.1875, "completions/mean_terminated_length": 1085.1875, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.23364672934586916, "frac_reward_zero_std": 0.0, "grad_norm": 3.5875487496981, "kl": 0.012939453125, "learning_rate": 9.520667641266781e-07, "loss": 0.0056, "num_tokens": 50719248.0, "reward": 0.0, "reward_std": 0.6857028007507324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03702286804320375, "rewards/wordcountpos_reward/raw_geo/std": 0.138005601771996, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1257.8125, "completions/mean_terminated_length": 1257.8125, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.23384676935387078, "frac_reward_zero_std": 0.0, "grad_norm": 2.6415271546142294, "kl": 0.00934600830078125, "learning_rate": 9.519255459513332e-07, "loss": -0.0625, "num_tokens": 50772357.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9801534414291382, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08769029612081755, "rewards/wordcountpos_reward/raw_geo/std": 0.07120901126223715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1127.75, "completions/mean_terminated_length": 1127.75, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.2340468093618724, "frac_reward_zero_std": 0.0, "grad_norm": 3.276188114267022, "kl": 0.0131378173828125, "learning_rate": 9.517841317952668e-07, "loss": 0.0353, "num_tokens": 50815889.0, "reward": 0.0, "reward_std": 0.9212627410888672, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20728460579494976, "rewards/wordcountpos_reward/raw_geo/std": 0.07644931995242212, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 973.9375, "completions/mean_terminated_length": 973.9375, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.23424684936987397, "frac_reward_zero_std": 0.0, "grad_norm": 3.742404127644982, "kl": 0.0117950439453125, "learning_rate": 9.516425217274333e-07, "loss": -0.0337, "num_tokens": 50844920.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7345133423805237, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024381230816104372, "rewards/wordcountpos_reward/raw_geo/std": 0.10279194730127855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1126.0625, "completions/mean_terminated_length": 1126.0625, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.23444688937787558, "frac_reward_zero_std": 0.0, "grad_norm": 2.3929706319840025, "kl": 0.00710296630859375, "learning_rate": 9.515007158168826e-07, "loss": 0.0094, "num_tokens": 50885457.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9731065034866333, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11804107636006356, "rewards/wordcountpos_reward/raw_geo/std": 0.09032435372091181, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1235.3125, "completions/mean_terminated_length": 1147.0833740234375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.23464692938587717, "frac_reward_zero_std": 0.0, "grad_norm": 2.934457807058159, "kl": 0.00884246826171875, "learning_rate": 9.513587141327596e-07, "loss": -0.0366, "num_tokens": 50931166.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7465544939041138, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008823395865892241, "rewards/wordcountpos_reward/raw_geo/std": 0.15549696475952096, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.23484696939387878, "frac_reward_zero_std": 0.0, "grad_norm": 4.047040221635227, "kl": 0.012542724609375, "learning_rate": 9.512165167443049e-07, "loss": -0.0133, "num_tokens": 50971176.0, "reward": 0.0, "reward_std": 0.8164693713188171, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.31286761631768606, "rewards/wordcountpos_reward/raw_geo/std": 0.23903127868433727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1207.875, "completions/mean_terminated_length": 1188.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.23504700940188036, "frac_reward_zero_std": 0.0, "grad_norm": 2.9908549695390736, "kl": 0.0099945068359375, "learning_rate": 9.510741237208549e-07, "loss": -0.035, "num_tokens": 51017710.0, "reward": 0.0, "reward_std": 0.9569621682167053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0778112573721702, "rewards/wordcountpos_reward/raw_geo/std": 0.1041567795106529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1125.3125, "completions/mean_terminated_length": 1100.3333740234375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.23524704940988198, "frac_reward_zero_std": 0.0, "grad_norm": 3.2818632108914927, "kl": 0.0149688720703125, "learning_rate": 9.509315351318409e-07, "loss": -0.0388, "num_tokens": 51054315.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3323240876197815, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0043583424699854025, "rewards/wordcountpos_reward/raw_geo/std": 0.283483378983603, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.2354470894178836, "frac_reward_zero_std": 0.0, "grad_norm": 3.970046876097469, "kl": 0.0124969482421875, "learning_rate": 9.507887510467898e-07, "loss": -0.0157, "num_tokens": 51091903.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8134541511535645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10165428380277537, "rewards/wordcountpos_reward/raw_geo/std": 0.134847942665467, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 1023.9375, "completions/mean_terminated_length": 1023.9375, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.23564712942588517, "frac_reward_zero_std": 0.0, "grad_norm": 3.1221687934548075, "kl": 0.01165771484375, "learning_rate": 9.506457715353236e-07, "loss": 0.0035, "num_tokens": 51131302.0, "reward": 0.0, "reward_std": 0.8757616281509399, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10334395655738081, "rewards/wordcountpos_reward/raw_geo/std": 0.1182151653097622, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1245.8125, "completions/mean_terminated_length": 1245.8125, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.23584716943388678, "frac_reward_zero_std": 0.0, "grad_norm": 2.301489027494201, "kl": 0.006927490234375, "learning_rate": 9.505025966671601e-07, "loss": 0.0011, "num_tokens": 51178699.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8741211891174316, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00703365811828602, "rewards/wordcountpos_reward/raw_geo/std": 0.08853917913110107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.23604720944188837, "frac_reward_zero_std": 0.0, "grad_norm": 3.45187718623664, "kl": 0.010833740234375, "learning_rate": 9.503592265121117e-07, "loss": -0.075, "num_tokens": 51209609.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9848106503486633, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037713592306805835, "rewards/wordcountpos_reward/raw_geo/std": 0.07878511564334237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1100.125, "completions/mean_terminated_length": 1100.125, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.23624724944988998, "frac_reward_zero_std": 0.0, "grad_norm": 3.632077568255943, "kl": 0.011260986328125, "learning_rate": 9.502156611400866e-07, "loss": 0.0433, "num_tokens": 51260811.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0279598236083984, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09201749942055977, "rewards/wordcountpos_reward/raw_geo/std": 0.12349186566069431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7999999999999999, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1077.125, "completions/mean_terminated_length": 1077.125, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.2364472894578916, "frac_reward_zero_std": 0.0, "grad_norm": 3.5967108465297017, "kl": 0.011383056640625, "learning_rate": 9.500719006210877e-07, "loss": 0.0161, "num_tokens": 51304605.0, "reward": 7.450580596923828e-09, "reward_std": 1.0358760356903076, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04038930341256521, "rewards/wordcountpos_reward/raw_geo/std": 0.0815503655116324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1130.375, "completions/mean_terminated_length": 1130.375, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.23664732946589317, "frac_reward_zero_std": 0.0, "grad_norm": 2.578118126851255, "kl": 0.0072784423828125, "learning_rate": 9.499279450252134e-07, "loss": -0.0177, "num_tokens": 51354531.0, "reward": 0.0, "reward_std": 1.0480678081512451, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.050917432313345945, "rewards/wordcountpos_reward/raw_geo/std": 0.09279703232527604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1222.8125, "completions/mean_terminated_length": 1158.84619140625, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.2368473694738948, "frac_reward_zero_std": 0.0, "grad_norm": 3.3309845637507802, "kl": 0.011810302734375, "learning_rate": 9.49783794422657e-07, "loss": 0.0026, "num_tokens": 51401072.0, "reward": 0.0, "reward_std": 0.8732857704162598, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.37882699098577566, "rewards/wordcountpos_reward/raw_geo/std": 0.2815182668443314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1158.625, "completions/mean_terminated_length": 953.7999877929688, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.23704740948189637, "frac_reward_zero_std": 0.0, "grad_norm": 4.920258931475021, "kl": 0.026824951171875, "learning_rate": 9.496394488837071e-07, "loss": -0.0223, "num_tokens": 51455730.0, "reward": 0.0, "reward_std": 0.8452078104019165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15884302512650458, "rewards/wordcountpos_reward/raw_geo/std": 0.19093395358271534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994054, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1108.0625, "completions/mean_terminated_length": 1108.0625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.23724744948989798, "frac_reward_zero_std": 0.0, "grad_norm": 3.0700632274382325, "kl": 0.010589599609375, "learning_rate": 9.494949084787472e-07, "loss": -0.0124, "num_tokens": 51503443.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9316726326942444, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00045393816161834596, "rewards/wordcountpos_reward/raw_geo/std": 0.0679674895431341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1093.8125, "completions/mean_terminated_length": 1093.8125, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.23744748949789957, "frac_reward_zero_std": 0.0, "grad_norm": 3.3904447841287713, "kl": 0.00940704345703125, "learning_rate": 9.493501732782559e-07, "loss": 0.0042, "num_tokens": 51545072.0, "reward": 0.0, "reward_std": 0.7127798199653625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0044994186072670915, "rewards/wordcountpos_reward/raw_geo/std": 0.14264825119462698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1221.0625, "completions/mean_terminated_length": 1181.21435546875, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.23764752950590118, "frac_reward_zero_std": 0.0, "grad_norm": 3.580893822264323, "kl": 0.0134429931640625, "learning_rate": 9.492052433528065e-07, "loss": 0.0136, "num_tokens": 51587025.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7911562919616699, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07228127403334517, "rewards/wordcountpos_reward/raw_geo/std": 0.09628089891620452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1261.5625, "completions/mean_terminated_length": 1118.5, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.2378475695139028, "frac_reward_zero_std": 0.0, "grad_norm": 3.0087998310251627, "kl": 0.0115814208984375, "learning_rate": 9.490601187730679e-07, "loss": 0.0073, "num_tokens": 51637242.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7797181010246277, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02010349009650849, "rewards/wordcountpos_reward/raw_geo/std": 0.1083281152268591, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1350.1875, "completions/mean_terminated_length": 1282.0909423828125, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.23804760952190437, "frac_reward_zero_std": 0.0, "grad_norm": 2.822008200619165, "kl": 0.0103759765625, "learning_rate": 9.489147996098031e-07, "loss": -0.0121, "num_tokens": 51683349.0, "reward": 0.0, "reward_std": 0.7279618382453918, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03702802984335253, "rewards/wordcountpos_reward/raw_geo/std": 0.04703194141491271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 993.0625, "completions/mean_terminated_length": 993.0625, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.23824764952990599, "frac_reward_zero_std": 0.0, "grad_norm": 3.701882399841415, "kl": 0.013031005859375, "learning_rate": 9.487692859338709e-07, "loss": 0.0112, "num_tokens": 51722030.0, "reward": 0.0, "reward_std": 0.9010850191116333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.009806227419362545, "rewards/wordcountpos_reward/raw_geo/std": 0.1905747634511497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1117.25, "completions/mean_terminated_length": 1117.25, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.23844768953790757, "frac_reward_zero_std": 0.0, "grad_norm": 2.810505389442069, "kl": 0.00878143310546875, "learning_rate": 9.486235778162238e-07, "loss": -0.0168, "num_tokens": 51763978.0, "reward": 0.0, "reward_std": 0.5175235271453857, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14803947345642846, "rewards/wordcountpos_reward/raw_geo/std": 0.07354937170211703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0893391374565564, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1100.25, "completions/mean_terminated_length": 1100.25, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.23864772954590918, "frac_reward_zero_std": 0.0, "grad_norm": 1.4596249986709346, "kl": 0.0035524368286132812, "learning_rate": 9.484776753279101e-07, "loss": 0.0254, "num_tokens": 51805126.0, "reward": 0.0, "reward_std": 0.8161255717277527, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01029224185612476, "rewards/wordcountpos_reward/raw_geo/std": 0.09661796965647951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1188.3125, "completions/mean_terminated_length": 1046.6363525390625, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2388477695539108, "frac_reward_zero_std": 0.0, "grad_norm": 2.576092245152571, "kl": 0.0103759765625, "learning_rate": 9.483315785400726e-07, "loss": 0.0282, "num_tokens": 51842611.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0089211463928223, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03627399429555535, "rewards/wordcountpos_reward/raw_geo/std": 0.03640598896562315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1213.8125, "completions/mean_terminated_length": 1147.769287109375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.23904780956191238, "frac_reward_zero_std": 0.0, "grad_norm": 3.034171019076665, "kl": 0.0101165771484375, "learning_rate": 9.481852875239485e-07, "loss": -0.0149, "num_tokens": 51887184.0, "reward": 0.0, "reward_std": 1.0333211421966553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12280088125068324, "rewards/wordcountpos_reward/raw_geo/std": 0.10592501670000959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 990.75, "completions/mean_terminated_length": 990.75, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.239247849569914, "frac_reward_zero_std": 0.0, "grad_norm": 3.9208670482912544, "kl": 0.0118408203125, "learning_rate": 9.480388023508702e-07, "loss": -0.0063, "num_tokens": 51930092.0, "reward": 0.0, "reward_std": 0.8676304221153259, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18921344064974438, "rewards/wordcountpos_reward/raw_geo/std": 0.06726169666722093, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1227.625, "completions/mean_terminated_length": 1164.769287109375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.23944788957791557, "frac_reward_zero_std": 0.0, "grad_norm": 3.306511428173286, "kl": 0.0122222900390625, "learning_rate": 9.478921230922643e-07, "loss": 0.0163, "num_tokens": 51982774.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9336946606636047, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012036807832469078, "rewards/wordcountpos_reward/raw_geo/std": 0.06759350200397629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1033.25, "completions/mean_terminated_length": 1033.25, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.23964792958591719, "frac_reward_zero_std": 0.0, "grad_norm": 3.050025602290657, "kl": 0.0113525390625, "learning_rate": 9.477452498196526e-07, "loss": -0.0074, "num_tokens": 52023714.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0611441135406494, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08425284284808922, "rewards/wordcountpos_reward/raw_geo/std": 0.06517950996801065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1055.25, "completions/mean_terminated_length": 1055.25, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.2398479695939188, "frac_reward_zero_std": 0.0, "grad_norm": 3.715366466434405, "kl": 0.0133819580078125, "learning_rate": 9.475981826046507e-07, "loss": -0.0157, "num_tokens": 52067366.0, "reward": 0.0, "reward_std": 0.7970370054244995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05611734152814263, "rewards/wordcountpos_reward/raw_geo/std": 0.055541232660735886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.24004800960192038, "frac_reward_zero_std": 0.0, "grad_norm": 3.8726900672960065, "kl": 0.01165771484375, "learning_rate": 9.474509215189696e-07, "loss": -0.0013, "num_tokens": 52097989.0, "reward": 0.0, "reward_std": 1.034703016281128, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1372414006268383, "rewards/wordcountpos_reward/raw_geo/std": 0.09141674453585732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1182.1875, "completions/mean_terminated_length": 1108.84619140625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.1137361802696395, "kl": 0.01104736328125, "learning_rate": 9.473034666344144e-07, "loss": 0.0152, "num_tokens": 52143624.0, "reward": 0.0, "reward_std": 0.8156384229660034, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04372307530559236, "rewards/wordcountpos_reward/raw_geo/std": 0.06022969349498634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 949.875, "completions/mean_terminated_length": 949.875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.24044808961792358, "frac_reward_zero_std": 0.0, "grad_norm": 2.7111420652674334, "kl": 0.00537872314453125, "learning_rate": 9.471558180228846e-07, "loss": -0.0416, "num_tokens": 52183870.0, "reward": 0.0, "reward_std": 1.040045142173767, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11768846747825505, "rewards/wordcountpos_reward/raw_geo/std": 0.09113578164705738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1307.375, "completions/mean_terminated_length": 1191.800048828125, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.2406481296259252, "frac_reward_zero_std": 0.0, "grad_norm": 2.636500778830953, "kl": 0.009918212890625, "learning_rate": 9.470079757563746e-07, "loss": -0.0298, "num_tokens": 52239284.0, "reward": 0.0, "reward_std": 0.988853394985199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1167864628785271, "rewards/wordcountpos_reward/raw_geo/std": 0.15083617038675007, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1108.625, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.24084816963392677, "frac_reward_zero_std": 0.0, "grad_norm": 3.2883015875013277, "kl": 0.0121917724609375, "learning_rate": 9.468599399069729e-07, "loss": -0.0293, "num_tokens": 52283758.0, "reward": 1.862645149230957e-08, "reward_std": 1.0627440214157104, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004401873162290123, "rewards/wordcountpos_reward/raw_geo/std": 0.09510812136194687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668904, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1130.0, "completions/mean_terminated_length": 1130.0, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.24104820964192838, "frac_reward_zero_std": 0.0, "grad_norm": 2.2424861077247162, "kl": 0.00669097900390625, "learning_rate": 9.467117105468623e-07, "loss": -0.0009, "num_tokens": 52327742.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8811639547348022, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1314306789294673, "rewards/wordcountpos_reward/raw_geo/std": 0.13167323837984155, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.24124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.9897326750244595, "kl": 0.013275146484375, "learning_rate": 9.465632877483203e-07, "loss": 0.0176, "num_tokens": 52364948.0, "reward": 0.0, "reward_std": 0.6958979964256287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005988940108211079, "rewards/wordcountpos_reward/raw_geo/std": 0.06471472546620861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1270.0625, "completions/mean_terminated_length": 1237.21435546875, "completions/min_length": 1060.0, "completions/min_terminated_length": 1060.0, "epoch": 0.24144828965793158, "frac_reward_zero_std": 0.0, "grad_norm": 1.5081031630971902, "kl": 0.00339508056640625, "learning_rate": 9.464146715837185e-07, "loss": 0.0148, "num_tokens": 52421301.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9385417699813843, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18336531727274233, "rewards/wordcountpos_reward/raw_geo/std": 0.06303006376165128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1080.1875, "completions/mean_terminated_length": 1080.1875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.2416483296659332, "frac_reward_zero_std": 0.0, "grad_norm": 3.2180471472785657, "kl": 0.01318359375, "learning_rate": 9.462658621255226e-07, "loss": -0.0149, "num_tokens": 52463816.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7174466848373413, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09307877163419827, "rewards/wordcountpos_reward/raw_geo/std": 0.08339463691108169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1236.125, "completions/mean_terminated_length": 1218.533447265625, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.24184836967393478, "frac_reward_zero_std": 0.0, "grad_norm": 2.7771094569910564, "kl": 0.009918212890625, "learning_rate": 9.461168594462931e-07, "loss": -0.0384, "num_tokens": 52499402.0, "reward": 0.0, "reward_std": 0.4614540636539459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025831890907897573, "rewards/wordcountpos_reward/raw_geo/std": 0.187955319239083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1175.3077392578125, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.2420484096819364, "frac_reward_zero_std": 0.0, "grad_norm": 2.29070496026933, "kl": 0.006862640380859375, "learning_rate": 9.459676636186839e-07, "loss": -0.024, "num_tokens": 52546757.0, "reward": 0.0, "reward_std": 0.7864881157875061, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0476956622955646, "rewards/wordcountpos_reward/raw_geo/std": 0.07896374044735588, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1089.75, "completions/mean_terminated_length": 1031.1429443359375, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.242248449689938, "frac_reward_zero_std": 0.0, "grad_norm": 2.393462248816949, "kl": 0.009674072265625, "learning_rate": 9.458182747154441e-07, "loss": 0.0065, "num_tokens": 52587857.0, "reward": 0.0, "reward_std": 0.5506587028503418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04336147948570137, "rewards/wordcountpos_reward/raw_geo/std": 0.07856243535963454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1221.6875, "completions/mean_terminated_length": 1157.4615478515625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.24244848969793958, "frac_reward_zero_std": 0.0, "grad_norm": 3.5084249508853333, "kl": 0.01727294921875, "learning_rate": 9.456686928094162e-07, "loss": -0.0174, "num_tokens": 52639788.0, "reward": 0.0, "reward_std": 0.4595772922039032, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053300381502168905, "rewards/wordcountpos_reward/raw_geo/std": 0.06024540174495668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1202.5, "completions/mean_terminated_length": 1182.666748046875, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.2426485297059412, "frac_reward_zero_std": 0.0, "grad_norm": 2.841871261585183, "kl": 0.00888824462890625, "learning_rate": 9.455189179735369e-07, "loss": 0.0094, "num_tokens": 52680164.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4850025773048401, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07535187466167527, "rewards/wordcountpos_reward/raw_geo/std": 0.08034663872131993, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1301.8125, "completions/mean_terminated_length": 1256.0770263671875, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.24284856971394278, "frac_reward_zero_std": 0.0, "grad_norm": 3.1168367910611146, "kl": 0.01312255859375, "learning_rate": 9.453689502808372e-07, "loss": -0.0337, "num_tokens": 52732921.0, "reward": 0.0, "reward_std": 0.5940068364143372, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03526778600763611, "rewards/wordcountpos_reward/raw_geo/std": 0.03642441276404843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1153.75, "completions/mean_terminated_length": 1153.75, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.2430486097219444, "frac_reward_zero_std": 0.0, "grad_norm": 3.3543488809062207, "kl": 0.0125579833984375, "learning_rate": 9.452187898044421e-07, "loss": 0.0066, "num_tokens": 52779901.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0402662754058838, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0902334592268817, "rewards/wordcountpos_reward/raw_geo/std": 0.06315647861149118, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 856.25, "completions/mean_terminated_length": 856.25, "completions/min_length": 510.0, "completions/min_terminated_length": 510.0, "epoch": 0.24324864972994598, "frac_reward_zero_std": 0.0, "grad_norm": 3.8965223276453784, "kl": 0.0151824951171875, "learning_rate": 9.450684366175703e-07, "loss": 0.0307, "num_tokens": 52811137.0, "reward": 0.0, "reward_std": 1.0612512826919556, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26006408819767146, "rewards/wordcountpos_reward/raw_geo/std": 0.2016617337884896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1164.375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.2434486897379476, "frac_reward_zero_std": 0.0, "grad_norm": 3.0730703365214422, "kl": 0.0086212158203125, "learning_rate": 9.449178907935349e-07, "loss": 0.0052, "num_tokens": 52853999.0, "reward": 0.0, "reward_std": 0.770872175693512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05712805244243435, "rewards/wordcountpos_reward/raw_geo/std": 0.0971223517825316, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 947.625, "completions/mean_terminated_length": 947.625, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.2436487297459492, "frac_reward_zero_std": 0.0, "grad_norm": 2.0875478678864225, "kl": 0.004611968994140625, "learning_rate": 9.447671524057427e-07, "loss": 0.0207, "num_tokens": 52884049.0, "reward": 0.0, "reward_std": 0.9738976955413818, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02275011329578451, "rewards/wordcountpos_reward/raw_geo/std": 0.06713969959170867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1213.125, "completions/mean_terminated_length": 1172.1429443359375, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.24384876975395078, "frac_reward_zero_std": 0.0, "grad_norm": 3.310734851548052, "kl": 0.0120086669921875, "learning_rate": 9.446162215276942e-07, "loss": -0.0046, "num_tokens": 52930739.0, "reward": 0.0, "reward_std": 1.033539056777954, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1725281437757172, "rewards/wordcountpos_reward/raw_geo/std": 0.0964625911720144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05374838498865701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1215.0625, "completions/mean_terminated_length": 1196.0667724609375, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.2440488097619524, "frac_reward_zero_std": 0.0, "grad_norm": 3.484867953204045, "kl": 0.008636474609375, "learning_rate": 9.444650982329844e-07, "loss": -0.0149, "num_tokens": 52977540.0, "reward": 0.0, "reward_std": 0.3614540696144104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03156820267612672, "rewards/wordcountpos_reward/raw_geo/std": 0.18337960772275527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1120.25, "completions/mean_terminated_length": 1094.933349609375, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.24424884976995398, "frac_reward_zero_std": 0.0, "grad_norm": 2.8357662007537283, "kl": 0.00901031494140625, "learning_rate": 9.443137825953013e-07, "loss": -0.0038, "num_tokens": 53016744.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6886307001113892, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08992545380416017, "rewards/wordcountpos_reward/raw_geo/std": 0.07444250252634478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1211.125, "completions/mean_terminated_length": 1169.857177734375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.2444488897779556, "frac_reward_zero_std": 0.0, "grad_norm": 3.5657132310649136, "kl": 0.012237548828125, "learning_rate": 9.441622746884275e-07, "loss": 0.002, "num_tokens": 53063770.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0602158308029175, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009409237166674606, "rewards/wordcountpos_reward/raw_geo/std": 0.05274953097403563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1338.5625, "completions/mean_terminated_length": 1131.0, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.2446489297859572, "frac_reward_zero_std": 0.0, "grad_norm": 2.7074057172479327, "kl": 0.00815582275390625, "learning_rate": 9.440105745862385e-07, "loss": -0.0038, "num_tokens": 53113579.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7996894121170044, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07257869077918999, "rewards/wordcountpos_reward/raw_geo/std": 0.06466109994840634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1226.0, "completions/mean_terminated_length": 1226.0, "completions/min_length": 1092.0, "completions/min_terminated_length": 1092.0, "epoch": 0.2448489697939588, "frac_reward_zero_std": 0.0, "grad_norm": 3.004768808105074, "kl": 0.01123046875, "learning_rate": 9.438586823627042e-07, "loss": -0.0177, "num_tokens": 53156339.0, "reward": -7.450580596923828e-09, "reward_std": 0.9870691299438477, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07950870546022343, "rewards/wordcountpos_reward/raw_geo/std": 0.0747406899854144, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1177.4375, "completions/mean_terminated_length": 1131.357177734375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.2450490098019604, "frac_reward_zero_std": 0.0, "grad_norm": 3.1225918469890233, "kl": 0.0121307373046875, "learning_rate": 9.43706598091888e-07, "loss": -0.0594, "num_tokens": 53209250.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8248011469841003, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06540694784462482, "rewards/wordcountpos_reward/raw_geo/std": 0.19245578448302406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1180.933349609375, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.24524904980996198, "frac_reward_zero_std": 0.0, "grad_norm": 3.3884814515719093, "kl": 0.01165771484375, "learning_rate": 9.435543218479467e-07, "loss": 0.033, "num_tokens": 53257056.0, "reward": 0.0, "reward_std": 0.8607234954833984, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22442087104005376, "rewards/wordcountpos_reward/raw_geo/std": 0.13746074123743787, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1548595540529595, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1104.625, "completions/mean_terminated_length": 1104.625, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.2454490898179636, "frac_reward_zero_std": 0.0, "grad_norm": 3.0764641261500683, "kl": 0.0107421875, "learning_rate": 9.43401853705131e-07, "loss": -0.0119, "num_tokens": 53309194.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9115550518035889, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10273402680658004, "rewards/wordcountpos_reward/raw_geo/std": 0.061602003513957164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 810.0625, "completions/mean_terminated_length": 810.0625, "completions/min_length": 618.0, "completions/min_terminated_length": 618.0, "epoch": 0.2456491298259652, "frac_reward_zero_std": 0.0, "grad_norm": 4.082409206758194, "kl": 0.0122222900390625, "learning_rate": 9.432491937377851e-07, "loss": -0.0203, "num_tokens": 53346859.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0469852685928345, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03824039220995795, "rewards/wordcountpos_reward/raw_geo/std": 0.09267537234211883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1248.875, "completions/mean_terminated_length": 1232.1334228515625, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.2458491698339668, "frac_reward_zero_std": 0.0, "grad_norm": 3.1504969618980527, "kl": 0.0100860595703125, "learning_rate": 9.430963420203465e-07, "loss": 0.0065, "num_tokens": 53395777.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0379801988601685, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1040317113317454, "rewards/wordcountpos_reward/raw_geo/std": 0.047444949142271924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1131.0, "completions/mean_terminated_length": 1045.84619140625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.2460492098419684, "frac_reward_zero_std": 0.0, "grad_norm": 3.105931406873175, "kl": 0.01165771484375, "learning_rate": 9.429432986273465e-07, "loss": 0.026, "num_tokens": 53438393.0, "reward": 0.0, "reward_std": 0.6744869947433472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05555753008665379, "rewards/wordcountpos_reward/raw_geo/std": 0.09988069993565164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1205.0, "completions/mean_terminated_length": 1205.0, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.24624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 2.7768386273106844, "kl": 0.0096893310546875, "learning_rate": 9.427900636334098e-07, "loss": -0.0085, "num_tokens": 53482937.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0528334379196167, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19000993407122252, "rewards/wordcountpos_reward/raw_geo/std": 0.07636180337848633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 921.375, "completions/mean_terminated_length": 921.375, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.2464492898579716, "frac_reward_zero_std": 0.0, "grad_norm": 3.955997351042825, "kl": 0.0114593505859375, "learning_rate": 9.426366371132546e-07, "loss": -0.0156, "num_tokens": 53512519.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9775012731552124, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043462585574213515, "rewards/wordcountpos_reward/raw_geo/std": 0.05402398330396254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1189.5, "completions/mean_terminated_length": 1189.5, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.24664932986597318, "frac_reward_zero_std": 0.0, "grad_norm": 2.6943663326354215, "kl": 0.0107574462890625, "learning_rate": 9.42483019141692e-07, "loss": -0.0201, "num_tokens": 53555799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.673210859298706, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02774593795186238, "rewards/wordcountpos_reward/raw_geo/std": 0.04204195327686426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1096.5, "completions/mean_terminated_length": 1096.5, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2468493698739748, "frac_reward_zero_std": 0.0, "grad_norm": 3.634829535697307, "kl": 0.013519287109375, "learning_rate": 9.423292097936272e-07, "loss": 0.027, "num_tokens": 53598943.0, "reward": 0.0, "reward_std": 1.0042479038238525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07921791030444014, "rewards/wordcountpos_reward/raw_geo/std": 0.13866086258010044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1048.875, "completions/mean_terminated_length": 1048.875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.2470494098819764, "frac_reward_zero_std": 0.0, "grad_norm": 3.490694987190087, "kl": 0.0132904052734375, "learning_rate": 9.421752091440581e-07, "loss": -0.0143, "num_tokens": 53635789.0, "reward": 0.0, "reward_std": 0.673473596572876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2077029319305134, "rewards/wordcountpos_reward/raw_geo/std": 0.09350673255412578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1267.25, "completions/mean_terminated_length": 1234.0, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.247249449889978, "frac_reward_zero_std": 0.0, "grad_norm": 2.815308130377926, "kl": 0.01129150390625, "learning_rate": 9.420210172680762e-07, "loss": -0.0279, "num_tokens": 53687441.0, "reward": 4.470348358154297e-08, "reward_std": 0.9420638084411621, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1221136587499842, "rewards/wordcountpos_reward/raw_geo/std": 0.2998261790636444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1282.375, "completions/mean_terminated_length": 1282.375, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.2474494898979796, "frac_reward_zero_std": 0.0, "grad_norm": 3.0969937632608437, "kl": 0.0116119384765625, "learning_rate": 9.418666342408662e-07, "loss": -0.0095, "num_tokens": 53730239.0, "reward": 0.0, "reward_std": 0.6483302116394043, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07912510687849839, "rewards/wordcountpos_reward/raw_geo/std": 0.08930312343390825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1170.375, "completions/mean_terminated_length": 1170.375, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.24764952990598119, "frac_reward_zero_std": 0.0, "grad_norm": 3.2990938862200676, "kl": 0.0114593505859375, "learning_rate": 9.41712060137706e-07, "loss": 0.031, "num_tokens": 53775149.0, "reward": 0.0, "reward_std": 0.9025614261627197, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14336716527511156, "rewards/wordcountpos_reward/raw_geo/std": 0.06975431090624587, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1264.125, "completions/mean_terminated_length": 1264.125, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.2478495699139828, "frac_reward_zero_std": 0.0, "grad_norm": 3.0643814018326276, "kl": 0.012725830078125, "learning_rate": 9.415572950339664e-07, "loss": -0.0269, "num_tokens": 53813991.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8022619485855103, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08465348038005661, "rewards/wordcountpos_reward/raw_geo/std": 0.09597399731325108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 897.8125, "completions/mean_terminated_length": 897.8125, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.2480496099219844, "frac_reward_zero_std": 0.0, "grad_norm": 3.1233814911266022, "kl": 0.00865936279296875, "learning_rate": 9.414023390051118e-07, "loss": -0.0004, "num_tokens": 53843940.0, "reward": 0.0, "reward_std": 0.819797158241272, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014740290475578583, "rewards/wordcountpos_reward/raw_geo/std": 0.0658470598782537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1274.625, "completions/mean_terminated_length": 1222.615478515625, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.248249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 3.075918954158901, "kl": 0.01251220703125, "learning_rate": 9.412471921266994e-07, "loss": -0.0582, "num_tokens": 53897470.0, "reward": 0.0, "reward_std": 0.9509785175323486, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23388790191161093, "rewards/wordcountpos_reward/raw_geo/std": 0.09003484326266731, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 897.1875, "completions/mean_terminated_length": 897.1875, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.2484496899379876, "frac_reward_zero_std": 0.0, "grad_norm": 3.680408307644297, "kl": 0.0113983154296875, "learning_rate": 9.410918544743793e-07, "loss": -0.0321, "num_tokens": 53943569.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9757611751556396, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02647252936003171, "rewards/wordcountpos_reward/raw_geo/std": 0.16936879227561216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1167.0, "completions/mean_terminated_length": 1167.0, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.2486497299459892, "frac_reward_zero_std": 0.0, "grad_norm": 3.0143625446443933, "kl": 0.00836181640625, "learning_rate": 9.409363261238952e-07, "loss": -0.0057, "num_tokens": 53985929.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5423867106437683, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2100799131987011, "rewards/wordcountpos_reward/raw_geo/std": 0.19522053766035405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1312.0625, "completions/mean_terminated_length": 1199.300048828125, "completions/min_length": 1071.0, "completions/min_terminated_length": 1071.0, "epoch": 0.2488497699539908, "frac_reward_zero_std": 0.0, "grad_norm": 2.865796921956238, "kl": 0.010650634765625, "learning_rate": 9.407806071510833e-07, "loss": 0.0036, "num_tokens": 54029746.0, "reward": 0.0, "reward_std": 0.9989716410636902, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10754467538891152, "rewards/wordcountpos_reward/raw_geo/std": 0.1220195948567986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 721.5625, "completions/mean_terminated_length": 721.5625, "completions/min_length": 507.0, "completions/min_terminated_length": 507.0, "epoch": 0.24904980996199239, "frac_reward_zero_std": 0.0, "grad_norm": 3.9716540563864653, "kl": 0.00891876220703125, "learning_rate": 9.406246976318727e-07, "loss": -0.003, "num_tokens": 54055875.0, "reward": 0.0, "reward_std": 0.7075194120407104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07191045737442983, "rewards/wordcountpos_reward/raw_geo/std": 0.0334827689059806, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.249249849969994, "frac_reward_zero_std": 0.0, "grad_norm": 3.057722477564494, "kl": 0.010772705078125, "learning_rate": 9.40468597642286e-07, "loss": -0.0164, "num_tokens": 54096051.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7778348922729492, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006909602977690901, "rewards/wordcountpos_reward/raw_geo/std": 0.09271125732127858, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1313.4375, "completions/mean_terminated_length": 1286.7857666015625, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.2494498899779956, "frac_reward_zero_std": 0.0, "grad_norm": 3.0355939075031535, "kl": 0.00888824462890625, "learning_rate": 9.403123072584378e-07, "loss": -0.0222, "num_tokens": 54141034.0, "reward": 2.9802322387695312e-08, "reward_std": 0.46149325370788574, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029357419913127118, "rewards/wordcountpos_reward/raw_geo/std": 0.19366208734464058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1152.4375, "completions/mean_terminated_length": 1152.4375, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.2496499299859972, "frac_reward_zero_std": 0.0, "grad_norm": 2.9580821520095806, "kl": 0.0133209228515625, "learning_rate": 9.401558265565363e-07, "loss": 0.0215, "num_tokens": 54190985.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6795248985290527, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05420108240828713, "rewards/wordcountpos_reward/raw_geo/std": 0.046221657466690776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1236.3125, "completions/mean_terminated_length": 1116.45458984375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.2498499699939988, "frac_reward_zero_std": 0.0, "grad_norm": 3.084881634824025, "kl": 0.01300048828125, "learning_rate": 9.399991556128821e-07, "loss": 0.0054, "num_tokens": 54234950.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0154902935028076, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09068974039494918, "rewards/wordcountpos_reward/raw_geo/std": 0.30661587622967174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1181.625, "completions/mean_terminated_length": 1160.4000244140625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.2500500100020004, "frac_reward_zero_std": 0.0, "grad_norm": 3.321451875280831, "kl": 0.01287841796875, "learning_rate": 9.398422945038687e-07, "loss": -0.0108, "num_tokens": 54274904.0, "reward": -7.450580596923828e-09, "reward_std": 0.9767227172851562, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0804224274690149, "rewards/wordcountpos_reward/raw_geo/std": 0.12264978339153693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1192.9375, "completions/mean_terminated_length": 1172.4666748046875, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.250250050010002, "frac_reward_zero_std": 0.0, "grad_norm": 3.4138594976102894, "kl": 0.01123046875, "learning_rate": 9.396852433059822e-07, "loss": -0.0358, "num_tokens": 54321735.0, "reward": 0.0, "reward_std": 0.6348475217819214, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.041239905887221616, "rewards/wordcountpos_reward/raw_geo/std": 0.12711543450847512, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1109.125, "completions/mean_terminated_length": 1109.125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.2504500900180036, "frac_reward_zero_std": 0.0, "grad_norm": 3.178998855405846, "kl": 0.010345458984375, "learning_rate": 9.395280020958017e-07, "loss": 0.0193, "num_tokens": 54362041.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6717987060546875, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0893681435224798, "rewards/wordcountpos_reward/raw_geo/std": 0.0686433669024916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1131.75, "completions/mean_terminated_length": 1131.75, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.2506501300260052, "frac_reward_zero_std": 0.0, "grad_norm": 2.5990984201054514, "kl": 0.008197784423828125, "learning_rate": 9.393705709499983e-07, "loss": -0.0162, "num_tokens": 54397573.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5909625887870789, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12603111910443537, "rewards/wordcountpos_reward/raw_geo/std": 0.2202199493009877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1216.6875, "completions/mean_terminated_length": 1197.800048828125, "completions/min_length": 723.0, "completions/min_terminated_length": 723.0, "epoch": 0.2508501700340068, "frac_reward_zero_std": 0.0, "grad_norm": 2.770105963651306, "kl": 0.009979248046875, "learning_rate": 9.392129499453365e-07, "loss": -0.0338, "num_tokens": 54442976.0, "reward": 0.0, "reward_std": 0.4439162611961365, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16088690330533229, "rewards/wordcountpos_reward/raw_geo/std": 0.2763708098292569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1077.0, "completions/max_terminated_length": 1077.0, "completions/mean_length": 901.9375, "completions/mean_terminated_length": 901.9375, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.2510502100420084, "frac_reward_zero_std": 0.0, "grad_norm": 2.8910077582467326, "kl": 0.00782012939453125, "learning_rate": 9.390551391586729e-07, "loss": -0.0004, "num_tokens": 54476887.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0163949728012085, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10093721940259831, "rewards/wordcountpos_reward/raw_geo/std": 0.13762791550120512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.25125025005001, "frac_reward_zero_std": 0.0, "grad_norm": 2.133116863804827, "kl": 0.00440216064453125, "learning_rate": 9.388971386669569e-07, "loss": 0.0268, "num_tokens": 54509892.0, "reward": 7.450580596923828e-09, "reward_std": 0.9897512793540955, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04225042507981598, "rewards/wordcountpos_reward/raw_geo/std": 0.11726899070892662, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055637, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1173.3077392578125, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2514502900580116, "frac_reward_zero_std": 0.0, "grad_norm": 3.2561124637036296, "kl": 0.014434814453125, "learning_rate": 9.387389485472301e-07, "loss": 0.0355, "num_tokens": 54548445.0, "reward": -7.450580596923828e-09, "reward_std": 1.0002635717391968, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013848969385092313, "rewards/wordcountpos_reward/raw_geo/std": 0.025816229223001773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1060.3125, "completions/mean_terminated_length": 1060.3125, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.25165033006601323, "frac_reward_zero_std": 0.0, "grad_norm": 3.0919865414413734, "kl": 0.01078033447265625, "learning_rate": 9.385805688766268e-07, "loss": 0.008, "num_tokens": 54586290.0, "reward": 2.9802322387695312e-08, "reward_std": 1.030531644821167, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11895778131688654, "rewards/wordcountpos_reward/raw_geo/std": 0.04901466419178711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1199.375, "completions/mean_terminated_length": 1156.4285888671875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.2518503700740148, "frac_reward_zero_std": 0.0, "grad_norm": 3.505503211167695, "kl": 0.0120086669921875, "learning_rate": 9.384219997323734e-07, "loss": 0.0006, "num_tokens": 54631328.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9332997798919678, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3848945762046329, "rewards/wordcountpos_reward/raw_geo/std": 0.1249950853313475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13326387079497304, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1212.625, "completions/mean_terminated_length": 1146.3077392578125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.2520504100820164, "frac_reward_zero_std": 0.0, "grad_norm": 3.1757813903450502, "kl": 0.0117034912109375, "learning_rate": 9.382632411917896e-07, "loss": 0.0202, "num_tokens": 54674058.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9522824287414551, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04022095800934172, "rewards/wordcountpos_reward/raw_geo/std": 0.0655810379231539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1148.0625, "completions/mean_terminated_length": 1124.60009765625, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.252250450090018, "frac_reward_zero_std": 0.0, "grad_norm": 3.6974507527693534, "kl": 0.0130462646484375, "learning_rate": 9.38104293332286e-07, "loss": 0.0099, "num_tokens": 54724667.0, "reward": -7.450580596923828e-09, "reward_std": 1.0409611463546753, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05184588466799917, "rewards/wordcountpos_reward/raw_geo/std": 0.09853057643512145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17250872227009062, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1130.1875, "completions/mean_terminated_length": 1130.1875, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.2524504900980196, "frac_reward_zero_std": 0.0, "grad_norm": 3.3250305011590573, "kl": 0.0126495361328125, "learning_rate": 9.379451562313665e-07, "loss": -0.0045, "num_tokens": 54752950.0, "reward": 7.450580596923828e-09, "reward_std": 1.0683720111846924, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010528151264465987, "rewards/wordcountpos_reward/raw_geo/std": 0.02327544213199588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1173.3125, "completions/mean_terminated_length": 1126.6429443359375, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.25265053010602123, "frac_reward_zero_std": 0.0, "grad_norm": 3.348001591553057, "kl": 0.0131072998046875, "learning_rate": 9.377858299666274e-07, "loss": -0.0698, "num_tokens": 54790131.0, "reward": 4.470348358154297e-08, "reward_std": 1.0145583152770996, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17056655754872932, "rewards/wordcountpos_reward/raw_geo/std": 0.1295930519159792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1189.9375, "completions/mean_terminated_length": 1118.3846435546875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.2528505701140228, "frac_reward_zero_std": 0.0, "grad_norm": 3.1579396158729773, "kl": 0.011810302734375, "learning_rate": 9.376263146157567e-07, "loss": -0.0406, "num_tokens": 54840154.0, "reward": 0.0, "reward_std": 0.7796655893325806, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01128076966471883, "rewards/wordcountpos_reward/raw_geo/std": 0.09652555944155027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1124.75, "completions/mean_terminated_length": 1099.7333984375, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.2530506101220244, "frac_reward_zero_std": 0.0, "grad_norm": 2.8229567483755087, "kl": 0.00846099853515625, "learning_rate": 9.374666102565349e-07, "loss": 0.0113, "num_tokens": 54885150.0, "reward": -7.450580596923828e-09, "reward_std": 1.0440900325775146, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16922787602805842, "rewards/wordcountpos_reward/raw_geo/std": 0.0929617691840423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1119.125, "completions/mean_terminated_length": 1119.125, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.253250650130026, "frac_reward_zero_std": 0.0, "grad_norm": 2.6367575137259913, "kl": 0.00952911376953125, "learning_rate": 9.373067169668342e-07, "loss": -0.0402, "num_tokens": 54939768.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7578144073486328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06613304395781526, "rewards/wordcountpos_reward/raw_geo/std": 0.09076340207322646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1215.625, "completions/mean_terminated_length": 1196.666748046875, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.2534506901380276, "frac_reward_zero_std": 0.0, "grad_norm": 3.2372695477011266, "kl": 0.0131683349609375, "learning_rate": 9.3714663482462e-07, "loss": -0.0329, "num_tokens": 54985642.0, "reward": 0.0, "reward_std": 0.7184152603149414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025812510818699015, "rewards/wordcountpos_reward/raw_geo/std": 0.10152114274486329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1129.0625, "completions/mean_terminated_length": 1129.0625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.2536507301460292, "frac_reward_zero_std": 0.0, "grad_norm": 3.443997557440038, "kl": 0.0140838623046875, "learning_rate": 9.369863639079483e-07, "loss": -0.0305, "num_tokens": 55035747.0, "reward": 0.0, "reward_std": 0.6500293016433716, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053422663878111516, "rewards/wordcountpos_reward/raw_geo/std": 0.04111691762533374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1300.625, "completions/mean_terminated_length": 1234.166748046875, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.2538507701540308, "frac_reward_zero_std": 0.0, "grad_norm": 2.6639089399628704, "kl": 0.00926971435546875, "learning_rate": 9.368259042949684e-07, "loss": 0.0396, "num_tokens": 55091045.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0621726512908936, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16927031563030392, "rewards/wordcountpos_reward/raw_geo/std": 0.1656906032063202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1003.21435546875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.2540508101620324, "frac_reward_zero_std": 0.0, "grad_norm": 3.446181509655221, "kl": 0.0171356201171875, "learning_rate": 9.366652560639213e-07, "loss": 0.0588, "num_tokens": 55135194.0, "reward": 0.0, "reward_std": 1.0683015584945679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006379943924650185, "rewards/wordcountpos_reward/raw_geo/std": 0.05344627696656618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 984.6875, "completions/mean_terminated_length": 984.6875, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.254250850170034, "frac_reward_zero_std": 0.0, "grad_norm": 3.1148038314636026, "kl": 0.0119781494140625, "learning_rate": 9.36504419293139e-07, "loss": 0.0002, "num_tokens": 55177877.0, "reward": 0.0, "reward_std": 0.3591625690460205, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13650479135143023, "rewards/wordcountpos_reward/raw_geo/std": 0.1886810896480471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 973.6875, "completions/mean_terminated_length": 973.6875, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.2544508901780356, "frac_reward_zero_std": 0.0, "grad_norm": 1.8649460848496238, "kl": 0.004665374755859375, "learning_rate": 9.363433940610473e-07, "loss": -0.0021, "num_tokens": 55212016.0, "reward": 0.0, "reward_std": 0.6665674448013306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10728997578930068, "rewards/wordcountpos_reward/raw_geo/std": 0.06827657733252843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534189, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 997.9375, "completions/mean_terminated_length": 964.4667358398438, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.2546509301860372, "frac_reward_zero_std": 0.0, "grad_norm": 3.9371494230639654, "kl": 0.0152130126953125, "learning_rate": 9.36182180446162e-07, "loss": 0.0211, "num_tokens": 55256783.0, "reward": -3.725290298461914e-09, "reward_std": 0.9784665107727051, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16698309507261566, "rewards/wordcountpos_reward/raw_geo/std": 0.3973044352034272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1315.625, "completions/mean_terminated_length": 1273.0770263671875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.2548509701940388, "frac_reward_zero_std": 0.0, "grad_norm": 2.907954872908463, "kl": 0.00988006591796875, "learning_rate": 9.360207785270919e-07, "loss": 0.0119, "num_tokens": 55301665.0, "reward": -2.2351741790771484e-08, "reward_std": 0.947008490562439, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03704052276339866, "rewards/wordcountpos_reward/raw_geo/std": 0.0511904361538753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1221.875, "completions/mean_terminated_length": 1203.3333740234375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.2550510102020404, "frac_reward_zero_std": 0.0, "grad_norm": 3.262873969831839, "kl": 0.0124969482421875, "learning_rate": 9.358591883825374e-07, "loss": 0.0128, "num_tokens": 55353775.0, "reward": 0.0, "reward_std": 0.48852062225341797, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02199998374618549, "rewards/wordcountpos_reward/raw_geo/std": 0.10642002252772947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1032.0, "completions/mean_terminated_length": 1032.0, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.255251050210042, "frac_reward_zero_std": 0.0, "grad_norm": 2.5441785081223056, "kl": 0.0072021484375, "learning_rate": 9.356974100912905e-07, "loss": -0.0012, "num_tokens": 55386567.0, "reward": 0.0, "reward_std": 0.9048218727111816, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18271576355998648, "rewards/wordcountpos_reward/raw_geo/std": 0.07811095232491601, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1170.0, "completions/max_terminated_length": 1170.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.25545109021804363, "frac_reward_zero_std": 0.0, "grad_norm": 2.892584458556108, "kl": 0.010223388671875, "learning_rate": 9.355354437322349e-07, "loss": -0.0218, "num_tokens": 55427677.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8625585436820984, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028637234879569543, "rewards/wordcountpos_reward/raw_geo/std": 0.06760539234396637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 963.6875, "completions/mean_terminated_length": 927.9334106445312, "completions/min_length": 468.0, "completions/min_terminated_length": 468.0, "epoch": 0.2556511302260452, "frac_reward_zero_std": 0.0, "grad_norm": 3.7800643707523567, "kl": 0.00937652587890625, "learning_rate": 9.353732893843463e-07, "loss": -0.0182, "num_tokens": 55454688.0, "reward": 1.4901161193847656e-08, "reward_std": 0.903618335723877, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028224982358281237, "rewards/wordcountpos_reward/raw_geo/std": 0.05138158383457142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1049.2857666015625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.2558511702340468, "frac_reward_zero_std": 0.0, "grad_norm": 3.0079094875909136, "kl": 0.01226806640625, "learning_rate": 9.352109471266921e-07, "loss": -0.0367, "num_tokens": 55499418.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0564343929290771, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08008234643155482, "rewards/wordcountpos_reward/raw_geo/std": 0.06916243548698373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1287.9375, "completions/mean_terminated_length": 1239.0, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.2560512102420484, "frac_reward_zero_std": 0.0, "grad_norm": 2.6124486637998205, "kl": 0.00928497314453125, "learning_rate": 9.350484170384305e-07, "loss": -0.0216, "num_tokens": 55552825.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9763938784599304, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06970593028905295, "rewards/wordcountpos_reward/raw_geo/std": 0.27792124404991814, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1264.75, "completions/mean_terminated_length": 1210.4615478515625, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.25625125025005, "frac_reward_zero_std": 0.0, "grad_norm": 2.9215080353326526, "kl": 0.012176513671875, "learning_rate": 9.348856991988124e-07, "loss": -0.0126, "num_tokens": 55607445.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8163424134254456, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06512166428289373, "rewards/wordcountpos_reward/raw_geo/std": 0.03965902068909061, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1189.9375, "completions/mean_terminated_length": 1086.5833740234375, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.25645129025805163, "frac_reward_zero_std": 0.0, "grad_norm": 2.738373363053566, "kl": 0.007617950439453125, "learning_rate": 9.347227936871798e-07, "loss": 0.0458, "num_tokens": 55649940.0, "reward": 0.0, "reward_std": 1.0240424871444702, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07930292713786982, "rewards/wordcountpos_reward/raw_geo/std": 0.08234009464922543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1209.5625, "completions/mean_terminated_length": 1190.2000732421875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.2566513302660532, "frac_reward_zero_std": 0.0, "grad_norm": 3.156896390176148, "kl": 0.0163421630859375, "learning_rate": 9.345597005829659e-07, "loss": -0.0253, "num_tokens": 55689333.0, "reward": 0.0, "reward_std": 0.5715999603271484, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02092505244427041, "rewards/wordcountpos_reward/raw_geo/std": 0.09103752137188358, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1074.9375, "completions/mean_terminated_length": 1074.9375, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.2568513702740548, "frac_reward_zero_std": 0.0, "grad_norm": 3.4638608716053567, "kl": 0.0129852294921875, "learning_rate": 9.343964199656958e-07, "loss": -0.0228, "num_tokens": 55725700.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0137016773223877, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.29295312213273805, "rewards/wordcountpos_reward/raw_geo/std": 0.17071984708252969, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1211.5625, "completions/mean_terminated_length": 1170.357177734375, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.2570514102820564, "frac_reward_zero_std": 0.0, "grad_norm": 3.4762024093779567, "kl": 0.0132598876953125, "learning_rate": 9.342329519149857e-07, "loss": -0.0003, "num_tokens": 55768733.0, "reward": 0.0, "reward_std": 0.9034953117370605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0421967356798582, "rewards/wordcountpos_reward/raw_geo/std": 0.0849121131335797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 965.0625, "completions/mean_terminated_length": 965.0625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.257251450290058, "frac_reward_zero_std": 0.0, "grad_norm": 4.1527745530547, "kl": 0.014801025390625, "learning_rate": 9.340692965105436e-07, "loss": -0.0444, "num_tokens": 55817182.0, "reward": 0.0, "reward_std": 1.0609381198883057, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06974323276844036, "rewards/wordcountpos_reward/raw_geo/std": 0.14156279044522646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1320.0, "completions/mean_terminated_length": 1278.4615478515625, "completions/min_length": 1150.0, "completions/min_terminated_length": 1150.0, "epoch": 0.25745149029805964, "frac_reward_zero_std": 0.0, "grad_norm": 2.752750218479042, "kl": 0.0117340087890625, "learning_rate": 9.339054538321684e-07, "loss": -0.0154, "num_tokens": 55867950.0, "reward": -2.9802322387695312e-08, "reward_std": 1.05967378616333, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17496370841224154, "rewards/wordcountpos_reward/raw_geo/std": 0.1790953166289047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 957.5, "completions/mean_terminated_length": 957.5, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.2576515303060612, "frac_reward_zero_std": 0.0, "grad_norm": 3.914814068971208, "kl": 0.0126953125, "learning_rate": 9.337414239597508e-07, "loss": -0.041, "num_tokens": 55903118.0, "reward": 0.0, "reward_std": 1.0278408527374268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.043575248178272186, "rewards/wordcountpos_reward/raw_geo/std": 0.04494286531979787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1079.625, "completions/mean_terminated_length": 1079.625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.2578515703140628, "frac_reward_zero_std": 0.0, "grad_norm": 3.502875907660986, "kl": 0.0133056640625, "learning_rate": 9.335772069732721e-07, "loss": 0.0121, "num_tokens": 55949144.0, "reward": 5.960464477539063e-08, "reward_std": 0.5807216167449951, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.016232348059682405, "rewards/wordcountpos_reward/raw_geo/std": 0.057718988541358776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1090.125, "completions/mean_terminated_length": 1090.125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.2580516103220644, "frac_reward_zero_std": 0.0, "grad_norm": 2.9485236288450274, "kl": 0.0113372802734375, "learning_rate": 9.334128029528056e-07, "loss": -0.0206, "num_tokens": 55991850.0, "reward": 0.0, "reward_std": 0.52561354637146, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14546362731117893, "rewards/wordcountpos_reward/raw_geo/std": 0.13004232923839992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 833.5, "completions/mean_terminated_length": 833.5, "completions/min_length": 548.0, "completions/min_terminated_length": 548.0, "epoch": 0.25825165033006603, "frac_reward_zero_std": 0.0, "grad_norm": 4.1589337518459955, "kl": 0.011627197265625, "learning_rate": 9.332482119785154e-07, "loss": -0.0537, "num_tokens": 56034122.0, "reward": 0.0, "reward_std": 1.0124348402023315, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04517783157465571, "rewards/wordcountpos_reward/raw_geo/std": 0.04922203207266278, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1100.5625, "completions/mean_terminated_length": 1100.5625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.25845169033806764, "frac_reward_zero_std": 0.0, "grad_norm": 3.7382282331356254, "kl": 0.01348876953125, "learning_rate": 9.330834341306568e-07, "loss": 0.0025, "num_tokens": 56069435.0, "reward": 2.9802322387695312e-08, "reward_std": 0.786846935749054, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08983485375460394, "rewards/wordcountpos_reward/raw_geo/std": 0.06404521590731933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1199.8125, "completions/mean_terminated_length": 1130.5384521484375, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.2586517303460692, "frac_reward_zero_std": 0.0, "grad_norm": 3.6756969750041906, "kl": 0.014495849609375, "learning_rate": 9.329184694895761e-07, "loss": -0.0248, "num_tokens": 56110576.0, "reward": 0.0, "reward_std": 0.8876994848251343, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14079246728090464, "rewards/wordcountpos_reward/raw_geo/std": 0.309775475969557, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1096.6875, "completions/mean_terminated_length": 1096.6875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.2588517703540708, "frac_reward_zero_std": 0.0, "grad_norm": 3.5586776080094893, "kl": 0.0144805908203125, "learning_rate": 9.327533181357108e-07, "loss": -0.0613, "num_tokens": 56158651.0, "reward": 0.0, "reward_std": 0.7723835706710815, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04638565570125904, "rewards/wordcountpos_reward/raw_geo/std": 0.08166517632064407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1342.125, "completions/mean_terminated_length": 1305.6923828125, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.2590518103620724, "frac_reward_zero_std": 0.0, "grad_norm": 3.314303317882215, "kl": 0.0146331787109375, "learning_rate": 9.325879801495896e-07, "loss": -0.0206, "num_tokens": 56206045.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0447652339935303, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.163557598438544, "rewards/wordcountpos_reward/raw_geo/std": 0.27087149321320614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1050.0, "completions/mean_terminated_length": 1050.0, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.25925185037007403, "frac_reward_zero_std": 0.0, "grad_norm": 3.6004080229042112, "kl": 0.009552001953125, "learning_rate": 9.32422455611832e-07, "loss": 0.0002, "num_tokens": 56240501.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9028578996658325, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0861874301459659, "rewards/wordcountpos_reward/raw_geo/std": 0.17727228389438807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1217.625, "completions/mean_terminated_length": 1217.625, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.2594518903780756, "frac_reward_zero_std": 0.0, "grad_norm": 3.1142774520483307, "kl": 0.0130462646484375, "learning_rate": 9.322567446031485e-07, "loss": 0.0169, "num_tokens": 56293007.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0351372957229614, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00587760601972199, "rewards/wordcountpos_reward/raw_geo/std": 0.13707811725817387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1151.0, "completions/max_terminated_length": 1151.0, "completions/mean_length": 959.0625, "completions/mean_terminated_length": 959.0625, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.2596519303860772, "frac_reward_zero_std": 0.0, "grad_norm": 2.8855079747390597, "kl": 0.00557708740234375, "learning_rate": 9.320908472043405e-07, "loss": -0.0256, "num_tokens": 56332312.0, "reward": 0.0, "reward_std": 0.9650878310203552, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2374036854434155, "rewards/wordcountpos_reward/raw_geo/std": 0.3076723839392811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1460593486680443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1247.75, "completions/mean_terminated_length": 1096.4000244140625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.2598519703940788, "frac_reward_zero_std": 0.0, "grad_norm": 3.5448430410040217, "kl": 0.01348876953125, "learning_rate": 9.319247634963005e-07, "loss": 0.0239, "num_tokens": 56377452.0, "reward": 0.0, "reward_std": 0.9623478055000305, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008811470563813087, "rewards/wordcountpos_reward/raw_geo/std": 0.07854741734794526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1367.125, "completions/mean_terminated_length": 1287.4000244140625, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.2600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 2.6420638267204843, "kl": 0.00991058349609375, "learning_rate": 9.317584935600112e-07, "loss": 0.0115, "num_tokens": 56431318.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5112171173095703, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05187853016644232, "rewards/wordcountpos_reward/raw_geo/std": 0.12294637238428911, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1033.0, "completions/mean_terminated_length": 1033.0, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.26025205041008204, "frac_reward_zero_std": 0.0, "grad_norm": 3.5760471199024932, "kl": 0.0125579833984375, "learning_rate": 9.315920374765473e-07, "loss": -0.0289, "num_tokens": 56473102.0, "reward": -2.2351741790771484e-08, "reward_std": 1.068831205368042, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.41433268403627094, "rewards/wordcountpos_reward/raw_geo/std": 0.12221847942477106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1162.4375, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.2604520904180836, "frac_reward_zero_std": 0.0, "grad_norm": 2.8413355835479677, "kl": 0.0210113525390625, "learning_rate": 9.314253953270729e-07, "loss": 0.0161, "num_tokens": 56515709.0, "reward": 0.0, "reward_std": 1.0106377601623535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007029772900578802, "rewards/wordcountpos_reward/raw_geo/std": 0.054686941639807043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1194.6875, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.2606521304260852, "frac_reward_zero_std": 0.0, "grad_norm": 2.6049537937413323, "kl": 0.00963592529296875, "learning_rate": 9.312585671928438e-07, "loss": 0.001, "num_tokens": 56560456.0, "reward": -3.725290298461914e-09, "reward_std": 1.0658988952636719, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09387440084174416, "rewards/wordcountpos_reward/raw_geo/std": 0.1573949780043434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1051.1875, "completions/mean_terminated_length": 1021.2667236328125, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.2608521704340868, "frac_reward_zero_std": 0.0, "grad_norm": 3.2230227145957655, "kl": 0.0103607177734375, "learning_rate": 9.31091553155206e-07, "loss": -0.0021, "num_tokens": 56603891.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7782348990440369, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07878562149932157, "rewards/wordcountpos_reward/raw_geo/std": 0.08156958135051963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.26105221044208843, "frac_reward_zero_std": 0.0, "grad_norm": 2.1813394648575954, "kl": 0.0089111328125, "learning_rate": 9.309243532955965e-07, "loss": 0.0004, "num_tokens": 56662019.0, "reward": 0.0, "reward_std": 1.019121527671814, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0003862166239334292, "rewards/wordcountpos_reward/raw_geo/std": 0.0381657552702585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1116.375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.26125225045009004, "frac_reward_zero_std": 0.0, "grad_norm": 3.0748479598448295, "kl": 0.009796142578125, "learning_rate": 9.307569676955427e-07, "loss": -0.0145, "num_tokens": 56701257.0, "reward": 0.0, "reward_std": 0.4968336820602417, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08799337952196712, "rewards/wordcountpos_reward/raw_geo/std": 0.20776554098218755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1327.5, "completions/mean_terminated_length": 1302.857177734375, "completions/min_length": 1148.0, "completions/min_terminated_length": 1148.0, "epoch": 0.2614522904580916, "frac_reward_zero_std": 0.0, "grad_norm": 2.721536714300266, "kl": 0.0088348388671875, "learning_rate": 9.305893964366622e-07, "loss": -0.0256, "num_tokens": 56758745.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9516972899436951, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18678117192364793, "rewards/wordcountpos_reward/raw_geo/std": 0.22755365163896482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1373.6875, "completions/mean_terminated_length": 1331.5833740234375, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.2616523304660932, "frac_reward_zero_std": 0.0, "grad_norm": 3.335858940834732, "kl": 0.0152587890625, "learning_rate": 9.30421639600664e-07, "loss": 0.003, "num_tokens": 56809836.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0039998292922974, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10080624442977545, "rewards/wordcountpos_reward/raw_geo/std": 0.10859932385832648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1124.3125, "completions/mean_terminated_length": 1124.3125, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.2618523704740948, "frac_reward_zero_std": 0.0, "grad_norm": 2.9559535032270343, "kl": 0.0161285400390625, "learning_rate": 9.302536972693468e-07, "loss": 0.0395, "num_tokens": 56844529.0, "reward": 0.0, "reward_std": 0.5205225944519043, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04030075384568577, "rewards/wordcountpos_reward/raw_geo/std": 0.05920299075581017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1119.3125, "completions/mean_terminated_length": 1119.3125, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.26205241048209643, "frac_reward_zero_std": 0.0, "grad_norm": 2.241231224971887, "kl": 0.0090789794921875, "learning_rate": 9.300855695246001e-07, "loss": 0.0157, "num_tokens": 56884694.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6421548128128052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11339379914287477, "rewards/wordcountpos_reward/raw_geo/std": 0.10363888855277821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1182.5, "completions/mean_terminated_length": 1182.5, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.26225245049009804, "frac_reward_zero_std": 0.0, "grad_norm": 2.926127619303371, "kl": 0.0115509033203125, "learning_rate": 9.299172564484037e-07, "loss": -0.0109, "num_tokens": 56929126.0, "reward": -7.450580596923828e-09, "reward_std": 1.065841794013977, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.026402997660014693, "rewards/wordcountpos_reward/raw_geo/std": 0.0982617540146608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1044.75, "completions/mean_terminated_length": 1014.4000244140625, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.2624524904980996, "frac_reward_zero_std": 0.0, "grad_norm": 3.665182666637484, "kl": 0.0137481689453125, "learning_rate": 9.297487581228278e-07, "loss": -0.0256, "num_tokens": 56961930.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6444878578186035, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0035201484503752907, "rewards/wordcountpos_reward/raw_geo/std": 0.03998971541012088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1003.5625, "completions/mean_terminated_length": 1003.5625, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.2626525305061012, "frac_reward_zero_std": 0.0, "grad_norm": 3.117479913061063, "kl": 0.00811767578125, "learning_rate": 9.295800746300333e-07, "loss": -0.0002, "num_tokens": 57008827.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0623592138290405, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21138889000868058, "rewards/wordcountpos_reward/raw_geo/std": 0.06605080761790647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1018.4375, "completions/mean_terminated_length": 1018.4375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.2628525705141028, "frac_reward_zero_std": 0.0, "grad_norm": 3.111308755693173, "kl": 0.014068603515625, "learning_rate": 9.294112060522707e-07, "loss": -0.0057, "num_tokens": 57058722.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8369705677032471, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07324355186536505, "rewards/wordcountpos_reward/raw_geo/std": 0.1534260980394482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1118.6875, "completions/mean_terminated_length": 1118.6875, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.26305261052210444, "frac_reward_zero_std": 0.0, "grad_norm": 3.165806866067674, "kl": 0.0119171142578125, "learning_rate": 9.29242152471881e-07, "loss": 0.0238, "num_tokens": 57110381.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7537950277328491, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12126831470287965, "rewards/wordcountpos_reward/raw_geo/std": 0.06961113409480561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.15491933384829668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1119.625, "completions/mean_terminated_length": 1119.625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.26325265053010605, "frac_reward_zero_std": 0.0, "grad_norm": 3.3213003949512054, "kl": 0.01226806640625, "learning_rate": 9.290729139712959e-07, "loss": -0.0109, "num_tokens": 57152983.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9175257682800293, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06356923035458562, "rewards/wordcountpos_reward/raw_geo/std": 0.11717775277599078, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1083.6875, "completions/mean_terminated_length": 1083.6875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.2634526905381076, "frac_reward_zero_std": 0.0, "grad_norm": 3.493175334128261, "kl": 0.012451171875, "learning_rate": 9.289034906330364e-07, "loss": -0.0132, "num_tokens": 57202906.0, "reward": 0.0, "reward_std": 0.8033919334411621, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19616846080657924, "rewards/wordcountpos_reward/raw_geo/std": 0.12880272523051836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1143.9375, "completions/mean_terminated_length": 1120.2000732421875, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.2636527305461092, "frac_reward_zero_std": 0.0, "grad_norm": 3.368674241011562, "kl": 0.0130615234375, "learning_rate": 9.287338825397144e-07, "loss": -0.025, "num_tokens": 57246649.0, "reward": 0.0, "reward_std": 0.8065879940986633, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0912165502212347, "rewards/wordcountpos_reward/raw_geo/std": 0.13101515209777695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1110.3125, "completions/mean_terminated_length": 1020.3846435546875, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.2638527705541108, "frac_reward_zero_std": 0.0, "grad_norm": 2.8502154225176315, "kl": 0.0113372802734375, "learning_rate": 9.285640897740315e-07, "loss": -0.0408, "num_tokens": 57294470.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8818243741989136, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07301610557198836, "rewards/wordcountpos_reward/raw_geo/std": 0.08821878668366548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1185.6875, "completions/mean_terminated_length": 1185.6875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.26405281056211244, "frac_reward_zero_std": 0.0, "grad_norm": 2.830628450412457, "kl": 0.00850677490234375, "learning_rate": 9.283941124187794e-07, "loss": 0.0051, "num_tokens": 57332625.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0440890789031982, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03537423104814936, "rewards/wordcountpos_reward/raw_geo/std": 0.07573538213556574, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1121.875, "completions/mean_terminated_length": 1121.875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 3.1565792310277754, "kl": 0.01190185546875, "learning_rate": 9.282239505568398e-07, "loss": -0.0457, "num_tokens": 57377071.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9014105796813965, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015475371323433004, "rewards/wordcountpos_reward/raw_geo/std": 0.10963715732439772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1059.875, "completions/mean_terminated_length": 1059.875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.2644528905781156, "frac_reward_zero_std": 0.0, "grad_norm": 3.4623503045748625, "kl": 0.012725830078125, "learning_rate": 9.280536042711843e-07, "loss": 0.0019, "num_tokens": 57426893.0, "reward": 0.0, "reward_std": 0.7773397564888, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06660652874005955, "rewards/wordcountpos_reward/raw_geo/std": 0.06535892696742167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1308.0625, "completions/mean_terminated_length": 1280.6429443359375, "completions/min_length": 1074.0, "completions/min_terminated_length": 1074.0, "epoch": 0.2646529305861172, "frac_reward_zero_std": 0.0, "grad_norm": 2.5581026411988477, "kl": 0.01007080078125, "learning_rate": 9.278830736448749e-07, "loss": 0.003, "num_tokens": 57465710.0, "reward": 0.0, "reward_std": 1.0626261234283447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13668668904954506, "rewards/wordcountpos_reward/raw_geo/std": 0.10590962318200114, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1010.125, "completions/mean_terminated_length": 1010.125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.26485297059411883, "frac_reward_zero_std": 0.0, "grad_norm": 3.178599080251012, "kl": 0.0114288330078125, "learning_rate": 9.277123587610627e-07, "loss": -0.017, "num_tokens": 57504896.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0451385974884033, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01110605229219865, "rewards/wordcountpos_reward/raw_geo/std": 0.16628909135656478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 986.9375, "completions/mean_terminated_length": 986.9375, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.26505301060212044, "frac_reward_zero_std": 0.0, "grad_norm": 3.2899546761259764, "kl": 0.0110931396484375, "learning_rate": 9.275414597029892e-07, "loss": -0.0421, "num_tokens": 57543143.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0194789171218872, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08199274817558691, "rewards/wordcountpos_reward/raw_geo/std": 0.11202208502788084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1230.3125, "completions/mean_terminated_length": 1212.3333740234375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.265253050610122, "frac_reward_zero_std": 0.0, "grad_norm": 3.1999373137212315, "kl": 0.013214111328125, "learning_rate": 9.273703765539856e-07, "loss": 0.008, "num_tokens": 57598172.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0607068538665771, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06759752511962717, "rewards/wordcountpos_reward/raw_geo/std": 0.32600691585950425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1335.875, "completions/mean_terminated_length": 1298.0, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.2654530906181236, "frac_reward_zero_std": 0.0, "grad_norm": 2.7561038392801196, "kl": 0.010406494140625, "learning_rate": 9.271991093974729e-07, "loss": 0.0455, "num_tokens": 57643634.0, "reward": 0.0, "reward_std": 0.9067040085792542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011244473880272929, "rewards/wordcountpos_reward/raw_geo/std": 0.14834116845580742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1231.3125, "completions/mean_terminated_length": 1192.9285888671875, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.2656531306261252, "frac_reward_zero_std": 0.0, "grad_norm": 3.2127250472615114, "kl": 0.0118560791015625, "learning_rate": 9.270276583169615e-07, "loss": -0.0101, "num_tokens": 57679415.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7477010488510132, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02571372064069735, "rewards/wordcountpos_reward/raw_geo/std": 0.06179244424519644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1275.625, "completions/mean_terminated_length": 1260.666748046875, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.26585317063412683, "frac_reward_zero_std": 0.0, "grad_norm": 2.9060665760288633, "kl": 0.0151519775390625, "learning_rate": 9.26856023396052e-07, "loss": 0.0219, "num_tokens": 57724153.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7904667854309082, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006325948389958885, "rewards/wordcountpos_reward/raw_geo/std": 0.07352738937643481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 971.75, "completions/mean_terminated_length": 936.5333862304688, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.26605321064212845, "frac_reward_zero_std": 0.0, "grad_norm": 4.293784926721248, "kl": 0.0136260986328125, "learning_rate": 9.266842047184341e-07, "loss": -0.0151, "num_tokens": 57774301.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0023233890533447, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08929856891248433, "rewards/wordcountpos_reward/raw_geo/std": 0.10912798576740892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1133.1875, "completions/mean_terminated_length": 1108.7333984375, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.26625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 2.6117504817377273, "kl": 0.0096435546875, "learning_rate": 9.265122023678876e-07, "loss": -0.0095, "num_tokens": 57825328.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9647880792617798, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0431066818198256, "rewards/wordcountpos_reward/raw_geo/std": 0.038825996973331764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1076.625, "completions/mean_terminated_length": 1048.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.2664532906581316, "frac_reward_zero_std": 0.0, "grad_norm": 3.3233035719936965, "kl": 0.0125732421875, "learning_rate": 9.263400164282813e-07, "loss": -0.0711, "num_tokens": 57866082.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9972634315490723, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10954330783243124, "rewards/wordcountpos_reward/raw_geo/std": 0.12660006848122876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1063.75, "completions/mean_terminated_length": 918.3333740234375, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.2666533306661332, "frac_reward_zero_std": 0.0, "grad_norm": 2.8658710772958367, "kl": 0.012237548828125, "learning_rate": 9.261676469835742e-07, "loss": -0.0622, "num_tokens": 57909158.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7784160375595093, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09446571757206894, "rewards/wordcountpos_reward/raw_geo/std": 0.0724714191814173, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1040.8125, "completions/mean_terminated_length": 1010.2000732421875, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.26685337067413484, "frac_reward_zero_std": 0.0, "grad_norm": 3.849908116579512, "kl": 0.01544189453125, "learning_rate": 9.259950941178143e-07, "loss": -0.0529, "num_tokens": 57944483.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8825619220733643, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.038455335685986705, "rewards/wordcountpos_reward/raw_geo/std": 0.07967617689491613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1191.75, "completions/mean_terminated_length": 1191.75, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.26705341068213645, "frac_reward_zero_std": 0.0, "grad_norm": 3.294961518687421, "kl": 0.0128021240234375, "learning_rate": 9.258223579151391e-07, "loss": -0.0127, "num_tokens": 57978503.0, "reward": 7.450580596923828e-09, "reward_std": 1.0289199352264404, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1318588002451407, "rewards/wordcountpos_reward/raw_geo/std": 0.040993331363048655, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1096.4375, "completions/mean_terminated_length": 1096.4375, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.267253450690138, "frac_reward_zero_std": 0.0, "grad_norm": 3.482373763042207, "kl": 0.011566162109375, "learning_rate": 9.256494384597757e-07, "loss": 0.0141, "num_tokens": 58018846.0, "reward": 0.0, "reward_std": 0.8990585207939148, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04837066820683459, "rewards/wordcountpos_reward/raw_geo/std": 0.16541771169728306, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1043.4375, "completions/mean_terminated_length": 1043.4375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.2674534906981396, "frac_reward_zero_std": 0.0, "grad_norm": 3.613469038751485, "kl": 0.015960693359375, "learning_rate": 9.254763358360404e-07, "loss": 0.0034, "num_tokens": 58058677.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8007277846336365, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4691028912080138, "rewards/wordcountpos_reward/raw_geo/std": 0.3709570160688002, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1013.625, "completions/mean_terminated_length": 1013.625, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.26765353070614123, "frac_reward_zero_std": 0.0, "grad_norm": 3.2969465098105486, "kl": 0.011199951171875, "learning_rate": 9.253030501283385e-07, "loss": -0.0032, "num_tokens": 58109831.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8624023199081421, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21244774534812277, "rewards/wordcountpos_reward/raw_geo/std": 0.0825516805567163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1343709624716425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1257.25, "completions/mean_terminated_length": 1111.5999755859375, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.26785357071414284, "frac_reward_zero_std": 0.0, "grad_norm": 2.140477136099414, "kl": 0.00653076171875, "learning_rate": 9.251295814211653e-07, "loss": 0.0038, "num_tokens": 58149803.0, "reward": 7.450580596923828e-09, "reward_std": 1.0512056350708008, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.026270034125531232, "rewards/wordcountpos_reward/raw_geo/std": 0.06230033372380366, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 990.6875, "completions/mean_terminated_length": 990.6875, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.26805361072214445, "frac_reward_zero_std": 0.0, "grad_norm": 3.852051986465934, "kl": 0.0138397216796875, "learning_rate": 9.249559297991048e-07, "loss": -0.0026, "num_tokens": 58180854.0, "reward": -1.4901161193847656e-08, "reward_std": 1.024838924407959, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013040899338463155, "rewards/wordcountpos_reward/raw_geo/std": 0.09594836052791329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1434.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1068.1875, "completions/mean_terminated_length": 1068.1875, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.268253650730146, "frac_reward_zero_std": 0.0, "grad_norm": 3.44857219439527, "kl": 0.0167694091796875, "learning_rate": 9.247820953468303e-07, "loss": -0.0598, "num_tokens": 58229393.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9985100030899048, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07919737214670426, "rewards/wordcountpos_reward/raw_geo/std": 0.06912740452471008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1349.25, "completions/mean_terminated_length": 1314.4615478515625, "completions/min_length": 1006.0, "completions/min_terminated_length": 1006.0, "epoch": 0.2684536907381476, "frac_reward_zero_std": 0.0, "grad_norm": 2.9074942292928037, "kl": 0.012451171875, "learning_rate": 9.24608078149104e-07, "loss": 0.0001, "num_tokens": 58276677.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8224983215332031, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12249196806413662, "rewards/wordcountpos_reward/raw_geo/std": 0.0753645342761518, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1074.625, "completions/mean_terminated_length": 1074.625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.26865373074614923, "frac_reward_zero_std": 0.0, "grad_norm": 3.230205102457441, "kl": 0.0110931396484375, "learning_rate": 9.244338782907779e-07, "loss": -0.0204, "num_tokens": 58314759.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8409535884857178, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010195062058330174, "rewards/wordcountpos_reward/raw_geo/std": 0.04283344997887795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.19907192074632132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1227.6875, "completions/mean_terminated_length": 1188.7857666015625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.26885377075415084, "frac_reward_zero_std": 0.0, "grad_norm": 3.164203935473939, "kl": 0.01324462890625, "learning_rate": 9.242594958567927e-07, "loss": -0.0148, "num_tokens": 58351210.0, "reward": 0.0, "reward_std": 0.9589077234268188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08234442081184556, "rewards/wordcountpos_reward/raw_geo/std": 0.0987841680522422, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1258.0, "completions/mean_terminated_length": 1241.86669921875, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.26905381076215246, "frac_reward_zero_std": 0.0, "grad_norm": 3.176546533261156, "kl": 0.01373291015625, "learning_rate": 9.240849309321775e-07, "loss": -0.0073, "num_tokens": 58398050.0, "reward": 1.862645149230957e-08, "reward_std": 1.0659713745117188, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026625377001508035, "rewards/wordcountpos_reward/raw_geo/std": 0.2688349212892977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1068.0, "completions/mean_terminated_length": 1068.0, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.269253850770154, "frac_reward_zero_std": 0.0, "grad_norm": 3.2654399088518344, "kl": 0.0128021240234375, "learning_rate": 9.239101836020514e-07, "loss": -0.0048, "num_tokens": 58447954.0, "reward": 0.0, "reward_std": 0.7926424741744995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05405716232633811, "rewards/wordcountpos_reward/raw_geo/std": 0.0538442902263567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 968.0, "completions/mean_terminated_length": 968.0, "completions/min_length": 709.0, "completions/min_terminated_length": 709.0, "epoch": 0.2694538907781556, "frac_reward_zero_std": 0.0, "grad_norm": 2.5701510035149515, "kl": 0.00904083251953125, "learning_rate": 9.237352539516218e-07, "loss": -0.003, "num_tokens": 58477826.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9795045852661133, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01667172677709021, "rewards/wordcountpos_reward/raw_geo/std": 0.03454947781911094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 877.375, "completions/mean_terminated_length": 877.375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.26965393078615724, "frac_reward_zero_std": 0.0, "grad_norm": 2.9214818861509846, "kl": 0.00888824462890625, "learning_rate": 9.235601420661854e-07, "loss": -0.0142, "num_tokens": 58521200.0, "reward": -2.9802322387695312e-08, "reward_std": 0.910345196723938, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19097399036646404, "rewards/wordcountpos_reward/raw_geo/std": 0.18371007194953604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1108.125, "completions/mean_terminated_length": 1017.6923217773438, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.26985397079415885, "frac_reward_zero_std": 0.0, "grad_norm": 3.702804609565373, "kl": 0.0123748779296875, "learning_rate": 9.233848480311276e-07, "loss": -0.0103, "num_tokens": 58564370.0, "reward": 0.0, "reward_std": 0.9629231691360474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.018004762443022855, "rewards/wordcountpos_reward/raw_geo/std": 0.04140659806051792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1251.125, "completions/mean_terminated_length": 1234.533447265625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.2700540108021604, "frac_reward_zero_std": 0.0, "grad_norm": 3.314697641112342, "kl": 0.011932373046875, "learning_rate": 9.232093719319222e-07, "loss": -0.0472, "num_tokens": 58612924.0, "reward": 2.9802322387695312e-08, "reward_std": 0.45265740156173706, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20688401678725907, "rewards/wordcountpos_reward/raw_geo/std": 0.19138629295852885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1014.1875, "completions/mean_terminated_length": 1014.1875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.270254050810162, "frac_reward_zero_std": 0.0, "grad_norm": 2.6738455445679485, "kl": 0.0095367431640625, "learning_rate": 9.230337138541324e-07, "loss": -0.018, "num_tokens": 58650943.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7811430096626282, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004392687295942687, "rewards/wordcountpos_reward/raw_geo/std": 0.056791360021955645, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408158, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1187.0, "completions/mean_terminated_length": 1166.1334228515625, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.27045409081816363, "frac_reward_zero_std": 0.0, "grad_norm": 3.1413484026259946, "kl": 0.0172119140625, "learning_rate": 9.228578738834097e-07, "loss": -0.0257, "num_tokens": 58694615.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9431371092796326, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05245220304259816, "rewards/wordcountpos_reward/raw_geo/std": 0.10026222694921318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1142.625, "completions/mean_terminated_length": 1118.800048828125, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.27065413082616524, "frac_reward_zero_std": 0.0, "grad_norm": 6.181918848272692, "kl": 0.0277862548828125, "learning_rate": 9.226818521054946e-07, "loss": 0.0056, "num_tokens": 58736977.0, "reward": 0.0, "reward_std": 0.7209534645080566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14046826286678493, "rewards/wordcountpos_reward/raw_geo/std": 0.08713244162950681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1149.0, "completions/mean_terminated_length": 1149.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.27085417083416685, "frac_reward_zero_std": 0.0, "grad_norm": 2.6225405473235086, "kl": 0.0102996826171875, "learning_rate": 9.225056486062162e-07, "loss": 0.0074, "num_tokens": 58783361.0, "reward": 0.0, "reward_std": 1.0312507152557373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11598048332012811, "rewards/wordcountpos_reward/raw_geo/std": 0.08426221901713499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1346.9375, "completions/mean_terminated_length": 1311.615478515625, "completions/min_length": 1080.0, "completions/min_terminated_length": 1080.0, "epoch": 0.2710542108421684, "frac_reward_zero_std": 0.0, "grad_norm": 3.054766502370073, "kl": 0.01318359375, "learning_rate": 9.22329263471492e-07, "loss": -0.0419, "num_tokens": 58824928.0, "reward": 1.4901161193847656e-08, "reward_std": 1.065586805343628, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1565419472804918, "rewards/wordcountpos_reward/raw_geo/std": 0.2769985640408725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1314.6875, "completions/mean_terminated_length": 1271.923095703125, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.27125425085017, "frac_reward_zero_std": 0.0, "grad_norm": 2.372160875298143, "kl": 0.00640869140625, "learning_rate": 9.221526967873282e-07, "loss": 0.0365, "num_tokens": 58874595.0, "reward": 0.0, "reward_std": 0.7954102754592896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07302610188194188, "rewards/wordcountpos_reward/raw_geo/std": 0.0504454627781234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 835.875, "completions/mean_terminated_length": 835.875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.27145429085817163, "frac_reward_zero_std": 0.0, "grad_norm": 4.535797584154669, "kl": 0.01132965087890625, "learning_rate": 9.219759486398195e-07, "loss": 0.0615, "num_tokens": 58915121.0, "reward": 2.9802322387695312e-08, "reward_std": 1.022127628326416, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03609640559805066, "rewards/wordcountpos_reward/raw_geo/std": 0.038934199834147, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1125.0, "completions/max_terminated_length": 1125.0, "completions/mean_length": 839.9375, "completions/mean_terminated_length": 839.9375, "completions/min_length": 576.0, "completions/min_terminated_length": 576.0, "epoch": 0.27165433086617324, "frac_reward_zero_std": 0.0, "grad_norm": 3.494026449834135, "kl": 0.00786590576171875, "learning_rate": 9.217990191151491e-07, "loss": 0.0187, "num_tokens": 58951608.0, "reward": 1.862645149230957e-08, "reward_std": 1.065683364868164, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004431650772553532, "rewards/wordcountpos_reward/raw_geo/std": 0.08076126551883148, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1069.25, "completions/mean_terminated_length": 1040.533447265625, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.27185437087417486, "frac_reward_zero_std": 0.0, "grad_norm": 3.341375727278998, "kl": 0.012908935546875, "learning_rate": 9.216219082995888e-07, "loss": 0.0143, "num_tokens": 59001508.0, "reward": 0.0, "reward_std": 0.6653932332992554, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07971418875054004, "rewards/wordcountpos_reward/raw_geo/std": 0.13084018230139893, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1164.3125, "completions/mean_terminated_length": 1164.3125, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.2720544108821764, "frac_reward_zero_std": 0.0, "grad_norm": 3.213983634875895, "kl": 0.0158538818359375, "learning_rate": 9.214446162794985e-07, "loss": 0.0115, "num_tokens": 59052089.0, "reward": 0.0, "reward_std": 0.6901043653488159, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07215062176201831, "rewards/wordcountpos_reward/raw_geo/std": 0.1702939987173843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1146.625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.272254450890178, "frac_reward_zero_std": 0.0, "grad_norm": 3.0531281776730466, "kl": 0.0136871337890625, "learning_rate": 9.212671431413266e-07, "loss": 0.0085, "num_tokens": 59101363.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0148411989212036, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05979066423449196, "rewards/wordcountpos_reward/raw_geo/std": 0.05588237713060525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1104.6875, "completions/mean_terminated_length": 1104.6875, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.27245449089817964, "frac_reward_zero_std": 0.0, "grad_norm": 2.7486261436638686, "kl": 0.01239013671875, "learning_rate": 9.210894889716096e-07, "loss": -0.0325, "num_tokens": 59139486.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9145042896270752, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04023661018076847, "rewards/wordcountpos_reward/raw_geo/std": 0.1367240657793835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1159.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 939.375, "completions/mean_terminated_length": 939.375, "completions/min_length": 502.0, "completions/min_terminated_length": 502.0, "epoch": 0.27265453090618125, "frac_reward_zero_std": 0.0, "grad_norm": 3.6228772180189983, "kl": 0.01214599609375, "learning_rate": 9.20911653856973e-07, "loss": -0.0875, "num_tokens": 59175116.0, "reward": 0.0, "reward_std": 0.7505817413330078, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053260712633788766, "rewards/wordcountpos_reward/raw_geo/std": 0.07830346831739698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1131.625, "completions/mean_terminated_length": 1107.0667724609375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.27285457091418286, "frac_reward_zero_std": 0.0, "grad_norm": 3.4109351390702862, "kl": 0.011871337890625, "learning_rate": 9.207336378841296e-07, "loss": 0.0134, "num_tokens": 59209534.0, "reward": 0.0, "reward_std": 0.8190287351608276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04012960712301714, "rewards/wordcountpos_reward/raw_geo/std": 0.07664959236179351, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 960.9375, "completions/mean_terminated_length": 960.9375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.2730546109221844, "frac_reward_zero_std": 0.0, "grad_norm": 3.6805429393713434, "kl": 0.016021728515625, "learning_rate": 9.205554411398809e-07, "loss": -0.0275, "num_tokens": 59245741.0, "reward": 0.0, "reward_std": 1.008124589920044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005485846110903772, "rewards/wordcountpos_reward/raw_geo/std": 0.23073269196236268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1250.1875, "completions/mean_terminated_length": 1233.533447265625, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.273254650930186, "frac_reward_zero_std": 0.0, "grad_norm": 2.924440519961206, "kl": 0.011688232421875, "learning_rate": 9.203770637111164e-07, "loss": -0.0247, "num_tokens": 59297224.0, "reward": 0.0, "reward_std": 0.7118884325027466, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0474396312917142, "rewards/wordcountpos_reward/raw_geo/std": 0.10290443680017783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1259.9375, "completions/mean_terminated_length": 1243.933349609375, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.27345469093818764, "frac_reward_zero_std": 0.0, "grad_norm": 3.0182803361247963, "kl": 0.0117950439453125, "learning_rate": 9.201985056848137e-07, "loss": 0.0079, "num_tokens": 59346439.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7288313508033752, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0688705113855613, "rewards/wordcountpos_reward/raw_geo/std": 0.14611475086579112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1176.0625, "completions/mean_terminated_length": 1028.8182373046875, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.27365473094618925, "frac_reward_zero_std": 0.0, "grad_norm": 3.778286334307396, "kl": 0.0194244384765625, "learning_rate": 9.200197671480388e-07, "loss": -0.0006, "num_tokens": 59401520.0, "reward": 7.450580596923828e-09, "reward_std": 1.0400464534759521, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.00717439011179862, "rewards/wordcountpos_reward/raw_geo/std": 0.06822649921302706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1339.4375, "completions/mean_terminated_length": 1178.875, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.27385477095419086, "frac_reward_zero_std": 0.0, "grad_norm": 2.7184337092932904, "kl": 0.00885772705078125, "learning_rate": 9.198408481879451e-07, "loss": -0.005, "num_tokens": 59447263.0, "reward": 0.0, "reward_std": 0.4408210515975952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02206530877448587, "rewards/wordcountpos_reward/raw_geo/std": 0.34701958478135586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 975.3125, "completions/mean_terminated_length": 975.3125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.2740548109621924, "frac_reward_zero_std": 0.0, "grad_norm": 3.68807508959835, "kl": 0.01873779296875, "learning_rate": 9.196617488917744e-07, "loss": -0.0839, "num_tokens": 59498204.0, "reward": 0.0, "reward_std": 0.98270583152771, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15365591842312276, "rewards/wordcountpos_reward/raw_geo/std": 0.22938719329650462, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1146.9375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.27425485097019403, "frac_reward_zero_std": 0.0, "grad_norm": 2.763464346946391, "kl": 0.00998687744140625, "learning_rate": 9.194824693468565e-07, "loss": 0.0185, "num_tokens": 59543899.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9326350688934326, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012038603836024654, "rewards/wordcountpos_reward/raw_geo/std": 0.10087102114186265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1220.0, "completions/mean_terminated_length": 1201.3333740234375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.27445489097819564, "frac_reward_zero_std": 0.0, "grad_norm": 3.5615131516961642, "kl": 0.013153076171875, "learning_rate": 9.193030096406086e-07, "loss": -0.0379, "num_tokens": 59592379.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0213422775268555, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018487710210411637, "rewards/wordcountpos_reward/raw_geo/std": 0.09214097374801154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 891.3125, "completions/mean_terminated_length": 891.3125, "completions/min_length": 602.0, "completions/min_terminated_length": 602.0, "epoch": 0.27465493098619725, "frac_reward_zero_std": 0.0, "grad_norm": 3.7929046672160664, "kl": 0.0175323486328125, "learning_rate": 9.191233698605362e-07, "loss": -0.0781, "num_tokens": 59642856.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8414506316184998, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14643251135397625, "rewards/wordcountpos_reward/raw_geo/std": 0.07938352603492103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1132.0625, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.27485497099419887, "frac_reward_zero_std": 0.0, "grad_norm": 2.8491561938692787, "kl": 0.00765228271484375, "learning_rate": 9.189435500942325e-07, "loss": 0.0062, "num_tokens": 59682665.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9678256511688232, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022124722463125385, "rewards/wordcountpos_reward/raw_geo/std": 0.22223568260669183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567834, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1411.3125, "completions/mean_terminated_length": 1342.3333740234375, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.2750550110022004, "frac_reward_zero_std": 0.0, "grad_norm": 3.1115698700810617, "kl": 0.0123748779296875, "learning_rate": 9.187635504293786e-07, "loss": -0.0057, "num_tokens": 59738502.0, "reward": 0.0, "reward_std": 0.7496911883354187, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.055865920176616526, "rewards/wordcountpos_reward/raw_geo/std": 0.059380351825215186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1044.0, "completions/mean_terminated_length": 1044.0, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.27525505101020203, "frac_reward_zero_std": 0.0, "grad_norm": 3.4574081519606032, "kl": 0.0126953125, "learning_rate": 9.185833709537428e-07, "loss": -0.0167, "num_tokens": 59773478.0, "reward": 0.0, "reward_std": 0.9807004928588867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035253871587400905, "rewards/wordcountpos_reward/raw_geo/std": 0.11689507061072131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1128.875, "completions/mean_terminated_length": 1104.1334228515625, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.27545509101820365, "frac_reward_zero_std": 0.0, "grad_norm": 3.730138739156282, "kl": 0.013519287109375, "learning_rate": 9.184030117551817e-07, "loss": -0.0004, "num_tokens": 59815428.0, "reward": 0.0, "reward_std": 0.6007387638092041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12532743233571975, "rewards/wordcountpos_reward/raw_geo/std": 0.13515303656722236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14395215254459456, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1131.9375, "completions/mean_terminated_length": 1131.9375, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.27565513102620526, "frac_reward_zero_std": 0.0, "grad_norm": 3.2719960529629306, "kl": 0.012908935546875, "learning_rate": 9.182224729216392e-07, "loss": -0.0071, "num_tokens": 59858811.0, "reward": 0.0, "reward_std": 0.7951167821884155, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2106708806558299, "rewards/wordcountpos_reward/raw_geo/std": 0.11682420488350234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1106.6875, "completions/mean_terminated_length": 1106.6875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.2758551710342068, "frac_reward_zero_std": 0.0, "grad_norm": 2.753226886309937, "kl": 0.00955963134765625, "learning_rate": 9.18041754541147e-07, "loss": -0.0088, "num_tokens": 59902230.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0246782302856445, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1506037484712467, "rewards/wordcountpos_reward/raw_geo/std": 0.10874218240308127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1143.25, "completions/mean_terminated_length": 1143.25, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.2760552110422084, "frac_reward_zero_std": 0.0, "grad_norm": 2.8745428739439634, "kl": 0.0088043212890625, "learning_rate": 9.178608567018239e-07, "loss": -0.0137, "num_tokens": 59957482.0, "reward": 0.0, "reward_std": 0.556842565536499, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06917283100637084, "rewards/wordcountpos_reward/raw_geo/std": 0.11221379997586392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1219.125, "completions/mean_terminated_length": 1200.4000244140625, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.27625525105021004, "frac_reward_zero_std": 0.0, "grad_norm": 2.2436239484436147, "kl": 0.0069580078125, "learning_rate": 9.176797794918766e-07, "loss": -0.0166, "num_tokens": 60002164.0, "reward": 0.0, "reward_std": 0.36811119318008423, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.017040913194512456, "rewards/wordcountpos_reward/raw_geo/std": 0.06916997067753554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1237.125, "completions/mean_terminated_length": 1219.60009765625, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.27645529105821165, "frac_reward_zero_std": 0.0, "grad_norm": 3.001512025782972, "kl": 0.00937652587890625, "learning_rate": 9.174985229995993e-07, "loss": 0.0295, "num_tokens": 60046182.0, "reward": 5.960464477539063e-08, "reward_std": 0.46628624200820923, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06045775510129246, "rewards/wordcountpos_reward/raw_geo/std": 0.09927655831750105, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1155.3125, "completions/mean_terminated_length": 1132.3333740234375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.27665533106621326, "frac_reward_zero_std": 0.0, "grad_norm": 3.0843710147433128, "kl": 0.011383056640625, "learning_rate": 9.173170873133733e-07, "loss": -0.0021, "num_tokens": 60077075.0, "reward": 7.450580596923828e-09, "reward_std": 1.0667418241500854, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04325215230585805, "rewards/wordcountpos_reward/raw_geo/std": 0.10393324990665573, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 982.3125, "completions/mean_terminated_length": 982.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.2768553710742148, "frac_reward_zero_std": 0.0, "grad_norm": 3.9166711158287266, "kl": 0.0164794921875, "learning_rate": 9.171354725216677e-07, "loss": -0.0151, "num_tokens": 60113672.0, "reward": 0.0, "reward_std": 0.8619920015335083, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09801672317854913, "rewards/wordcountpos_reward/raw_geo/std": 0.09151933487160709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1168.8125, "completions/mean_terminated_length": 1168.8125, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.27705541108221643, "frac_reward_zero_std": 0.0, "grad_norm": 2.7847432214754884, "kl": 0.01123046875, "learning_rate": 9.169536787130384e-07, "loss": -0.0279, "num_tokens": 60147797.0, "reward": -4.470348358154297e-08, "reward_std": 1.0301002264022827, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2890869895835476, "rewards/wordcountpos_reward/raw_geo/std": 0.13441483814687813, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1056.0, "completions/mean_terminated_length": 1056.0, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.27725545109021804, "frac_reward_zero_std": 0.0, "grad_norm": 3.3176775283118762, "kl": 0.016632080078125, "learning_rate": 9.167717059761291e-07, "loss": -0.0636, "num_tokens": 60189253.0, "reward": 0.0, "reward_std": 0.7266931533813477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16031608474647557, "rewards/wordcountpos_reward/raw_geo/std": 0.19554262779120318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.27745549109821965, "frac_reward_zero_std": 0.0, "grad_norm": 2.920281906862375, "kl": 0.00777435302734375, "learning_rate": 9.165895543996706e-07, "loss": 0.0105, "num_tokens": 60221416.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0156707763671875, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054389071187768555, "rewards/wordcountpos_reward/raw_geo/std": 0.11223096068870163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 981.4375, "completions/mean_terminated_length": 946.86669921875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.27765553110622126, "frac_reward_zero_std": 0.0, "grad_norm": 3.416121583200704, "kl": 0.010345458984375, "learning_rate": 9.164072240724808e-07, "loss": 0.0229, "num_tokens": 60253767.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0603771209716797, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008125927638311006, "rewards/wordcountpos_reward/raw_geo/std": 0.07373990583865948, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1139.375, "completions/mean_terminated_length": 1139.375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.2778555711142228, "frac_reward_zero_std": 0.0, "grad_norm": 2.8533955068373826, "kl": 0.00876617431640625, "learning_rate": 9.162247150834647e-07, "loss": 0.0, "num_tokens": 60305109.0, "reward": 0.0, "reward_std": 0.6584036350250244, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.060784437580004666, "rewards/wordcountpos_reward/raw_geo/std": 0.08400836683962326, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1245.4375, "completions/mean_terminated_length": 1209.071533203125, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.27805561112222443, "frac_reward_zero_std": 0.0, "grad_norm": 3.1302470566562377, "kl": 0.0109405517578125, "learning_rate": 9.160420275216143e-07, "loss": 0.0321, "num_tokens": 60341028.0, "reward": -1.862645149230957e-08, "reward_std": 1.0534486770629883, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027637515891455113, "rewards/wordcountpos_reward/raw_geo/std": 0.0659749422925075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1372.375, "completions/mean_terminated_length": 1329.8333740234375, "completions/min_length": 1104.0, "completions/min_terminated_length": 1104.0, "epoch": 0.27825565113022604, "frac_reward_zero_std": 0.0, "grad_norm": 2.6027572542391426, "kl": 0.00890350341796875, "learning_rate": 9.158591614760093e-07, "loss": -0.0086, "num_tokens": 60388514.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9040713310241699, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17927903611793783, "rewards/wordcountpos_reward/raw_geo/std": 0.1986670529308137, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1023.1875, "completions/mean_terminated_length": 1023.1875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.27845569113822766, "frac_reward_zero_std": 0.0, "grad_norm": 3.6358193470144036, "kl": 0.0148468017578125, "learning_rate": 9.156761170358157e-07, "loss": -0.0116, "num_tokens": 60420573.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0098563432693481, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001829963679511255, "rewards/wordcountpos_reward/raw_geo/std": 0.0781881984245527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1122.3125, "completions/mean_terminated_length": 1097.1334228515625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.27865573114622927, "frac_reward_zero_std": 0.0, "grad_norm": 2.4969738299342152, "kl": 0.00811004638671875, "learning_rate": 9.154928942902871e-07, "loss": 0.0218, "num_tokens": 60477450.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8635658025741577, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16187138590245986, "rewards/wordcountpos_reward/raw_geo/std": 0.17641266496894842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1458055529095489, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1114.75, "completions/mean_terminated_length": 1114.75, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.2788557711542308, "frac_reward_zero_std": 0.0, "grad_norm": 2.6937617133765004, "kl": 0.005733489990234375, "learning_rate": 9.153094933287635e-07, "loss": -0.0058, "num_tokens": 60514174.0, "reward": 0.0, "reward_std": 0.7024930119514465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025331060471821844, "rewards/wordcountpos_reward/raw_geo/std": 0.0690205858861114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1322.0625, "completions/mean_terminated_length": 1310.2000732421875, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.27905581116223244, "frac_reward_zero_std": 0.0, "grad_norm": 2.4181775509293018, "kl": 0.009765625, "learning_rate": 9.15125914240672e-07, "loss": -0.0291, "num_tokens": 60564591.0, "reward": 4.470348358154297e-08, "reward_std": 0.9179534912109375, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03233699560197412, "rewards/wordcountpos_reward/raw_geo/std": 0.08916381968352377, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.12292725943057184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1072.375, "completions/mean_terminated_length": 1072.375, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.27925585117023405, "frac_reward_zero_std": 0.0, "grad_norm": 3.471388624322902, "kl": 0.010955810546875, "learning_rate": 9.149421571155269e-07, "loss": -0.0255, "num_tokens": 60610949.0, "reward": 0.0, "reward_std": 0.8790923357009888, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0758267658493836, "rewards/wordcountpos_reward/raw_geo/std": 0.24199525282344028, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1370.0, "completions/mean_terminated_length": 1310.9091796875, "completions/min_length": 1169.0, "completions/min_terminated_length": 1169.0, "epoch": 0.27945589117823566, "frac_reward_zero_std": 0.0, "grad_norm": 2.8391010472620946, "kl": 0.0278472900390625, "learning_rate": 9.14758222042929e-07, "loss": -0.0236, "num_tokens": 60668997.0, "reward": 0.0, "reward_std": 1.0483356714248657, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012110993128130854, "rewards/wordcountpos_reward/raw_geo/std": 0.11317337540671665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1246.375, "completions/mean_terminated_length": 1229.4666748046875, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.27965593118623727, "frac_reward_zero_std": 0.0, "grad_norm": 2.859472757284942, "kl": 0.013946533203125, "learning_rate": 9.145741091125653e-07, "loss": 0.0013, "num_tokens": 60717571.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9298990964889526, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13493727839137018, "rewards/wordcountpos_reward/raw_geo/std": 0.1848709559123489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 1224.8125, "completions/mean_terminated_length": 949.625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.27985597119423883, "frac_reward_zero_std": 0.0, "grad_norm": 2.758751563743274, "kl": 0.0103912353515625, "learning_rate": 9.143898184142107e-07, "loss": 0.0057, "num_tokens": 60761488.0, "reward": 0.0, "reward_std": 0.9024456739425659, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05697900253738435, "rewards/wordcountpos_reward/raw_geo/std": 0.16501723273535449, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1089.25, "completions/mean_terminated_length": 1089.25, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.28005601120224044, "frac_reward_zero_std": 0.0, "grad_norm": 3.340532550121953, "kl": 0.01507568359375, "learning_rate": 9.142053500377258e-07, "loss": 0.0431, "num_tokens": 60805076.0, "reward": -2.2351741790771484e-08, "reward_std": 1.052138328552246, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03481408316901882, "rewards/wordcountpos_reward/raw_geo/std": 0.1715353872668126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1135.875, "completions/mean_terminated_length": 1135.875, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.28025605121024205, "frac_reward_zero_std": 0.0, "grad_norm": 3.5706284087083304, "kl": 0.018280029296875, "learning_rate": 9.140207040730587e-07, "loss": -0.0007, "num_tokens": 60856314.0, "reward": 2.9802322387695312e-08, "reward_std": 1.026914358139038, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08055660424274584, "rewards/wordcountpos_reward/raw_geo/std": 0.13011315507970053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1225.375, "completions/mean_terminated_length": 1207.0667724609375, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.28045609121824366, "frac_reward_zero_std": 0.0, "grad_norm": 2.8888657307053447, "kl": 0.00897979736328125, "learning_rate": 9.138358806102432e-07, "loss": -0.0221, "num_tokens": 60896328.0, "reward": -1.4901161193847656e-08, "reward_std": 1.039982795715332, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.047106577752333754, "rewards/wordcountpos_reward/raw_geo/std": 0.07876730222779964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.2806561312262453, "frac_reward_zero_std": 0.0, "grad_norm": 3.28062145839446, "kl": 0.011688232421875, "learning_rate": 9.136508797393999e-07, "loss": -0.0279, "num_tokens": 60942405.0, "reward": 0.0, "reward_std": 0.3488418459892273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021329339379944856, "rewards/wordcountpos_reward/raw_geo/std": 0.15904756894326477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1070.3125, "completions/mean_terminated_length": 1070.3125, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.28085617123424683, "frac_reward_zero_std": 0.0, "grad_norm": 3.5226019399508246, "kl": 0.0145721435546875, "learning_rate": 9.134657015507367e-07, "loss": 0.0012, "num_tokens": 60993634.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9390586614608765, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.041611643642463406, "rewards/wordcountpos_reward/raw_geo/std": 0.10000222250740834, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1213.0625, "completions/mean_terminated_length": 1213.0625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.28105621124224844, "frac_reward_zero_std": 0.0, "grad_norm": 3.394117089295289, "kl": 0.0121002197265625, "learning_rate": 9.132803461345468e-07, "loss": -0.0273, "num_tokens": 61033331.0, "reward": 1.4901161193847656e-08, "reward_std": 1.044857144355774, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16409514266286096, "rewards/wordcountpos_reward/raw_geo/std": 0.09849294878670774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1247.1875, "completions/mean_terminated_length": 1247.1875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.28125625125025006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1509064024289497, "kl": 0.011505126953125, "learning_rate": 9.130948135812105e-07, "loss": -0.0369, "num_tokens": 61080814.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0137759447097778, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10340731870290201, "rewards/wordcountpos_reward/raw_geo/std": 0.22499057309146034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.28145629125825167, "frac_reward_zero_std": 0.0, "grad_norm": 3.198765305430847, "kl": 0.011749267578125, "learning_rate": 9.129091039811944e-07, "loss": -0.046, "num_tokens": 61118059.0, "reward": -2.9802322387695312e-08, "reward_std": 0.961811363697052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.28597241753409464, "rewards/wordcountpos_reward/raw_geo/std": 0.11224233033028823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1039.0, "completions/max_terminated_length": 1039.0, "completions/mean_length": 902.375, "completions/mean_terminated_length": 902.375, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.2816563312662532, "frac_reward_zero_std": 0.0, "grad_norm": 2.407459661893286, "kl": 0.00640869140625, "learning_rate": 9.127232174250511e-07, "loss": -0.0175, "num_tokens": 61152185.0, "reward": 7.450580596923828e-09, "reward_std": 1.009409785270691, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07016394094188246, "rewards/wordcountpos_reward/raw_geo/std": 0.06222900023738038, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1080.0, "completions/mean_terminated_length": 1080.0, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.28185637127425484, "frac_reward_zero_std": 0.0, "grad_norm": 3.5100415793474813, "kl": 0.012908935546875, "learning_rate": 9.125371540034202e-07, "loss": 0.0034, "num_tokens": 61188313.0, "reward": 0.0, "reward_std": 0.7352811694145203, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12839352769421766, "rewards/wordcountpos_reward/raw_geo/std": 0.11672063972840939, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1060.8125, "completions/mean_terminated_length": 1060.8125, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.28205641128225645, "frac_reward_zero_std": 0.0, "grad_norm": 2.953119358459673, "kl": 0.0134735107421875, "learning_rate": 9.123509138070265e-07, "loss": -0.0695, "num_tokens": 61221454.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0047800540924072, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07318731766562347, "rewards/wordcountpos_reward/raw_geo/std": 0.08146496729947218, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1115.9375, "completions/mean_terminated_length": 1115.9375, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.28225645129025806, "frac_reward_zero_std": 0.0, "grad_norm": 3.093505310480765, "kl": 0.01263427734375, "learning_rate": 9.121644969266819e-07, "loss": -0.0149, "num_tokens": 61263653.0, "reward": 0.0, "reward_std": 0.7194733619689941, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19166954442962117, "rewards/wordcountpos_reward/raw_geo/std": 0.23097975328680015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1186.1875, "completions/mean_terminated_length": 1165.2667236328125, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.28245649129825967, "frac_reward_zero_std": 0.0, "grad_norm": 3.276809477300148, "kl": 0.0137786865234375, "learning_rate": 9.11977903453284e-07, "loss": -0.0738, "num_tokens": 61318640.0, "reward": 7.450580596923828e-09, "reward_std": 1.048647403717041, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07428405352559575, "rewards/wordcountpos_reward/raw_geo/std": 0.1190818858808534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1251.5, "completions/mean_terminated_length": 1234.933349609375, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.2826565313062612, "frac_reward_zero_std": 0.0, "grad_norm": 2.458683276654638, "kl": 0.007568359375, "learning_rate": 9.117911334778167e-07, "loss": 0.0384, "num_tokens": 61372528.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7630959749221802, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06604938458179417, "rewards/wordcountpos_reward/raw_geo/std": 0.20768778526773846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1173.5, "completions/mean_terminated_length": 1173.5, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.28285657131426284, "frac_reward_zero_std": 0.0, "grad_norm": 3.6553934176518825, "kl": 0.013824462890625, "learning_rate": 9.116041870913498e-07, "loss": -0.0088, "num_tokens": 61412368.0, "reward": 0.0, "reward_std": 1.0296192169189453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15830651054739794, "rewards/wordcountpos_reward/raw_geo/std": 0.3277690323617169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1164.4375, "completions/mean_terminated_length": 1164.4375, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.28305661132226445, "frac_reward_zero_std": 0.0, "grad_norm": 3.0205958375516304, "kl": 0.0125885009765625, "learning_rate": 9.114170643850393e-07, "loss": -0.0114, "num_tokens": 61462791.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8752886056900024, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06802981831711734, "rewards/wordcountpos_reward/raw_geo/std": 0.052331723360420176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1219.4375, "completions/mean_terminated_length": 1219.4375, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.28325665133026606, "frac_reward_zero_std": 0.0, "grad_norm": 3.34903630805863, "kl": 0.01397705078125, "learning_rate": 9.11229765450127e-07, "loss": 0.0141, "num_tokens": 61505686.0, "reward": 0.0, "reward_std": 0.7645179033279419, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26829285807831027, "rewards/wordcountpos_reward/raw_geo/std": 0.102550054786711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1144.25, "completions/mean_terminated_length": 1144.25, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.2834566913382677, "frac_reward_zero_std": 0.0, "grad_norm": 3.108151669114976, "kl": 0.011505126953125, "learning_rate": 9.110422903779408e-07, "loss": -0.0565, "num_tokens": 61547962.0, "reward": 0.0, "reward_std": 0.8635854721069336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10239790319860115, "rewards/wordcountpos_reward/raw_geo/std": 0.1684096718898323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1061.625, "completions/mean_terminated_length": 1061.625, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.28365673134626923, "frac_reward_zero_std": 0.0, "grad_norm": 2.9902954024825954, "kl": 0.01169586181640625, "learning_rate": 9.108546392598945e-07, "loss": 0.0166, "num_tokens": 61585140.0, "reward": 0.0, "reward_std": 0.9319888353347778, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005817982719527747, "rewards/wordcountpos_reward/raw_geo/std": 0.0785972315239089, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1111.5625, "completions/mean_terminated_length": 1111.5625, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.28385677135427084, "frac_reward_zero_std": 0.0, "grad_norm": 3.0608009052326564, "kl": 0.008331298828125, "learning_rate": 9.106668121874873e-07, "loss": 0.0125, "num_tokens": 61621029.0, "reward": 0.0, "reward_std": 0.766880452632904, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12466408091652723, "rewards/wordcountpos_reward/raw_geo/std": 0.11572453519138466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1361.6875, "completions/mean_terminated_length": 1183.857177734375, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.28405681136227245, "frac_reward_zero_std": 0.0, "grad_norm": 2.6030415566219904, "kl": 0.0117340087890625, "learning_rate": 9.104788092523048e-07, "loss": -0.0387, "num_tokens": 61677536.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9692643284797668, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03903487772093722, "rewards/wordcountpos_reward/raw_geo/std": 0.1753418148802447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15910630036178586, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1133.416748046875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.28425685137027407, "frac_reward_zero_std": 0.0, "grad_norm": 2.6940880465172916, "kl": 0.0140380859375, "learning_rate": 9.10290630546018e-07, "loss": -0.1308, "num_tokens": 61721713.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9340627789497375, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06334090977059484, "rewards/wordcountpos_reward/raw_geo/std": 0.09202209244918899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1746424919657298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 948.4375, "completions/mean_terminated_length": 948.4375, "completions/min_length": 656.0, "completions/min_terminated_length": 656.0, "epoch": 0.2844568913782757, "frac_reward_zero_std": 0.0, "grad_norm": 4.0159451943825655, "kl": 0.015167236328125, "learning_rate": 9.101022761603838e-07, "loss": 0.0092, "num_tokens": 61752800.0, "reward": -4.470348358154297e-08, "reward_std": 0.9512420892715454, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0166592115907759, "rewards/wordcountpos_reward/raw_geo/std": 0.02306741491050495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1196.875, "completions/mean_terminated_length": 1176.666748046875, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.28465693138627723, "frac_reward_zero_std": 0.0, "grad_norm": 2.732968073261137, "kl": 0.0109100341796875, "learning_rate": 9.099137461872442e-07, "loss": 0.0079, "num_tokens": 61796486.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9555996656417847, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09531989835351454, "rewards/wordcountpos_reward/raw_geo/std": 0.14151302477176167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1027.625, "completions/mean_terminated_length": 1027.625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.28485697139427885, "frac_reward_zero_std": 0.0, "grad_norm": 3.876026713182956, "kl": 0.015533447265625, "learning_rate": 9.09725040718528e-07, "loss": -0.0312, "num_tokens": 61840184.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9122369289398193, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06729442328936362, "rewards/wordcountpos_reward/raw_geo/std": 0.12909710838971472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1130.2000732421875, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.28505701140228046, "frac_reward_zero_std": 0.0, "grad_norm": 3.198453997245997, "kl": 0.010589599609375, "learning_rate": 9.095361598462483e-07, "loss": -0.021, "num_tokens": 61882254.0, "reward": 0.0, "reward_std": 0.9591430425643921, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.026527549385203953, "rewards/wordcountpos_reward/raw_geo/std": 0.11430009243375268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1404.5, "completions/mean_terminated_length": 1281.71435546875, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.28525705141028207, "frac_reward_zero_std": 0.0, "grad_norm": 3.2792028494388274, "kl": 0.0125732421875, "learning_rate": 9.093471036625046e-07, "loss": -0.0101, "num_tokens": 61935734.0, "reward": -7.450580596923828e-09, "reward_std": 1.0573853254318237, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09118229364441431, "rewards/wordcountpos_reward/raw_geo/std": 0.09009431404291929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 1055.0625, "completions/mean_terminated_length": 1025.4000244140625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.2854570914182837, "frac_reward_zero_std": 0.0, "grad_norm": 3.5455844711763955, "kl": 0.011474609375, "learning_rate": 9.091578722594811e-07, "loss": -0.0157, "num_tokens": 61966927.0, "reward": 1.862645149230957e-08, "reward_std": 1.0687472820281982, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04691214056755259, "rewards/wordcountpos_reward/raw_geo/std": 0.0661403137723392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1006.75, "completions/mean_terminated_length": 1006.75, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.28565713142628524, "frac_reward_zero_std": 0.0, "grad_norm": 2.5435659637659245, "kl": 0.0093231201171875, "learning_rate": 9.089684657294485e-07, "loss": -0.049, "num_tokens": 62003747.0, "reward": 0.0, "reward_std": 1.027086615562439, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22486759902127695, "rewards/wordcountpos_reward/raw_geo/std": 0.25593635309590834, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1092.375, "completions/mean_terminated_length": 1092.375, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.28585717143428685, "frac_reward_zero_std": 0.0, "grad_norm": 3.4834189959099735, "kl": 0.0149993896484375, "learning_rate": 9.087788841647619e-07, "loss": -0.0134, "num_tokens": 62043457.0, "reward": 0.0, "reward_std": 0.9337885975837708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11889745881981413, "rewards/wordcountpos_reward/raw_geo/std": 0.16351219621503862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1016.5, "completions/mean_terminated_length": 1016.5, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.28605721144228846, "frac_reward_zero_std": 0.0, "grad_norm": 3.406045296365549, "kl": 0.0119781494140625, "learning_rate": 9.085891276578621e-07, "loss": 0.0054, "num_tokens": 62075681.0, "reward": 0.0, "reward_std": 0.6838955879211426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0019982397269586527, "rewards/wordcountpos_reward/raw_geo/std": 0.060939103777697526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1091.0625, "completions/mean_terminated_length": 1063.800048828125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.2862572514502901, "frac_reward_zero_std": 0.0, "grad_norm": 2.882823054837441, "kl": 0.0104522705078125, "learning_rate": 9.083991963012753e-07, "loss": -0.0109, "num_tokens": 62126626.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9261794686317444, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023166729783343547, "rewards/wordcountpos_reward/raw_geo/std": 0.1326399105086657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1119.75, "completions/mean_terminated_length": 1119.75, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.2864572914582917, "frac_reward_zero_std": 0.0, "grad_norm": 3.0442541290074376, "kl": 0.0121612548828125, "learning_rate": 9.082090901876131e-07, "loss": 0.0108, "num_tokens": 62168030.0, "reward": 0.0, "reward_std": 0.5862631797790527, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1623374306106013, "rewards/wordcountpos_reward/raw_geo/std": 0.20840766676787967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1142.5, "completions/mean_terminated_length": 1142.5, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.28665733146629324, "frac_reward_zero_std": 0.0, "grad_norm": 3.2686045048162846, "kl": 0.0131683349609375, "learning_rate": 9.080188094095717e-07, "loss": 0.0164, "num_tokens": 62206358.0, "reward": 0.0, "reward_std": 0.9273918271064758, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.036940030843641815, "rewards/wordcountpos_reward/raw_geo/std": 0.1378179969519579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1347.3125, "completions/mean_terminated_length": 1312.0770263671875, "completions/min_length": 1151.0, "completions/min_terminated_length": 1151.0, "epoch": 0.28685737147429485, "frac_reward_zero_std": 0.0, "grad_norm": 3.1060355334989995, "kl": 0.01318359375, "learning_rate": 9.078283540599333e-07, "loss": -0.0352, "num_tokens": 62249539.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9784984588623047, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0701166993773698, "rewards/wordcountpos_reward/raw_geo/std": 0.17176638318784243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1148.125, "completions/mean_terminated_length": 1124.666748046875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.28705741148229647, "frac_reward_zero_std": 0.0, "grad_norm": 3.2827851230503495, "kl": 0.0108184814453125, "learning_rate": 9.076377242315645e-07, "loss": 0.023, "num_tokens": 62303189.0, "reward": -7.450580596923828e-09, "reward_std": 1.0397303104400635, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013142796108544632, "rewards/wordcountpos_reward/raw_geo/std": 0.08275107863964964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1141.625, "completions/mean_terminated_length": 1022.1666870117188, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2872574514902981, "frac_reward_zero_std": 0.0, "grad_norm": 2.9022260815901473, "kl": 0.01263427734375, "learning_rate": 9.074469200174174e-07, "loss": 0.001, "num_tokens": 62355927.0, "reward": 0.0, "reward_std": 0.6853197813034058, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16060916623382235, "rewards/wordcountpos_reward/raw_geo/std": 0.05504937960144757, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 975.4375, "completions/mean_terminated_length": 975.4375, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.28745749149829963, "frac_reward_zero_std": 0.5, "grad_norm": 2.6366982491001685, "kl": 0.01513671875, "learning_rate": 9.07255941510529e-07, "loss": -0.0053, "num_tokens": 62386054.0, "reward": 2.2351741790771484e-08, "reward_std": 0.7467156052589417, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0047600792209254155, "rewards/wordcountpos_reward/raw_geo/std": 0.009419677579760049, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1060.75, "completions/mean_terminated_length": 1060.75, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.28765753150630125, "frac_reward_zero_std": 0.0, "grad_norm": 3.982152914527174, "kl": 0.015228271484375, "learning_rate": 9.070647888040213e-07, "loss": -0.0038, "num_tokens": 62433210.0, "reward": -7.450580596923828e-09, "reward_std": 1.051116704940796, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1402622738382543, "rewards/wordcountpos_reward/raw_geo/std": 0.05415710287503152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1184.6875, "completions/mean_terminated_length": 1184.6875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.28785757151430286, "frac_reward_zero_std": 0.0, "grad_norm": 3.1666054086887634, "kl": 0.0117340087890625, "learning_rate": 9.068734619911009e-07, "loss": 0.0059, "num_tokens": 62480445.0, "reward": 0.0, "reward_std": 0.9157466292381287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.050639453352962945, "rewards/wordcountpos_reward/raw_geo/std": 0.04500294762279917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1136.125, "completions/mean_terminated_length": 1084.1429443359375, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.28805761152230447, "frac_reward_zero_std": 0.0, "grad_norm": 3.6127257019318777, "kl": 0.014617919921875, "learning_rate": 9.066819611650603e-07, "loss": -0.0178, "num_tokens": 62526151.0, "reward": 0.0, "reward_std": 0.9471538066864014, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04549529780207248, "rewards/wordcountpos_reward/raw_geo/std": 0.11488908788444303, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.2882576515303061, "frac_reward_zero_std": 0.0, "grad_norm": 2.36479215666573, "kl": 0.00827789306640625, "learning_rate": 9.064902864192755e-07, "loss": 0.0178, "num_tokens": 62565285.0, "reward": 0.0, "reward_std": 1.0272464752197266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.24254513753330645, "rewards/wordcountpos_reward/raw_geo/std": 0.2797745325025318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1173.1875, "completions/mean_terminated_length": 1151.4000244140625, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.28845769153830764, "frac_reward_zero_std": 0.0, "grad_norm": 3.4801179064641943, "kl": 0.0132904052734375, "learning_rate": 9.062984378472082e-07, "loss": -0.0191, "num_tokens": 62607680.0, "reward": 0.0, "reward_std": 0.47428542375564575, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3120862149079335, "rewards/wordcountpos_reward/raw_geo/std": 0.48378952234024997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.16329931618554522, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.28865773154630925, "frac_reward_zero_std": 0.0, "grad_norm": 3.1022020231140717, "kl": 0.01092529296875, "learning_rate": 9.061064155424049e-07, "loss": -0.0187, "num_tokens": 62647979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9856134653091431, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019645325420136105, "rewards/wordcountpos_reward/raw_geo/std": 0.04902552675106132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1224.9375, "completions/mean_terminated_length": 1206.60009765625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.28885777155431086, "frac_reward_zero_std": 0.0, "grad_norm": 3.343545315805295, "kl": 0.013458251953125, "learning_rate": 9.059142195984962e-07, "loss": 0.0293, "num_tokens": 62696314.0, "reward": -7.450580596923828e-09, "reward_std": 1.0684583187103271, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08204080362934202, "rewards/wordcountpos_reward/raw_geo/std": 0.09430845216030127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1130.5625, "completions/mean_terminated_length": 1130.5625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.28905781156231247, "frac_reward_zero_std": 0.0, "grad_norm": 2.4298549899077098, "kl": 0.01165771484375, "learning_rate": 9.057218501091981e-07, "loss": -0.0403, "num_tokens": 62736451.0, "reward": 0.0, "reward_std": 0.9954296350479126, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.030408648293848026, "rewards/wordcountpos_reward/raw_geo/std": 0.032268355705222525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1311.625, "completions/mean_terminated_length": 1284.71435546875, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.2892578515703141, "frac_reward_zero_std": 0.0, "grad_norm": 2.997163145207722, "kl": 0.01416015625, "learning_rate": 9.055293071683104e-07, "loss": -0.0076, "num_tokens": 62786861.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6685308814048767, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0958720921979418, "rewards/wordcountpos_reward/raw_geo/std": 0.12980651945744268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1006.4375, "completions/mean_terminated_length": 1006.4375, "completions/min_length": 621.0, "completions/min_terminated_length": 621.0, "epoch": 0.28945789157831564, "frac_reward_zero_std": 0.0, "grad_norm": 1.6744484309448755, "kl": 0.004062652587890625, "learning_rate": 9.053365908697181e-07, "loss": -0.0331, "num_tokens": 62834852.0, "reward": 0.0, "reward_std": 0.8366628885269165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09478176851892338, "rewards/wordcountpos_reward/raw_geo/std": 0.0708232886416502, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1158.0625, "completions/mean_terminated_length": 1158.0625, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.28965793158631725, "frac_reward_zero_std": 0.0, "grad_norm": 3.5241780217148113, "kl": 0.0135040283203125, "learning_rate": 9.051437013073904e-07, "loss": 0.0245, "num_tokens": 62884965.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8428228497505188, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1069153750712083, "rewards/wordcountpos_reward/raw_geo/std": 0.12776473747042635, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1196.8125, "completions/mean_terminated_length": 1153.5, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.28985797159431886, "frac_reward_zero_std": 0.0, "grad_norm": 3.37114446028865, "kl": 0.01226806640625, "learning_rate": 9.049506385753814e-07, "loss": -0.0179, "num_tokens": 62944058.0, "reward": -4.470348358154297e-08, "reward_std": 0.9549105167388916, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03988080949283152, "rewards/wordcountpos_reward/raw_geo/std": 0.1618169811657405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1069.0625, "completions/mean_terminated_length": 1069.0625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.2900580116023205, "frac_reward_zero_std": 0.0, "grad_norm": 3.2739879996675487, "kl": 0.0138702392578125, "learning_rate": 9.047574027678293e-07, "loss": -0.0133, "num_tokens": 62980955.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9645575284957886, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08330779602296855, "rewards/wordcountpos_reward/raw_geo/std": 0.2037769178831789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1114.75, "completions/mean_terminated_length": 1114.75, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.2902580516103221, "frac_reward_zero_std": 0.0, "grad_norm": 2.7936679022605118, "kl": 0.014312744140625, "learning_rate": 9.045639939789566e-07, "loss": -0.0282, "num_tokens": 63030951.0, "reward": 1.4901161193847656e-08, "reward_std": 0.7950387597084045, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10317419512022896, "rewards/wordcountpos_reward/raw_geo/std": 0.15865256278818526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.29045809161832364, "frac_reward_zero_std": 0.0, "grad_norm": 3.629558687933672, "kl": 0.0142364501953125, "learning_rate": 9.043704123030704e-07, "loss": -0.0064, "num_tokens": 63063604.0, "reward": -5.960464477539063e-08, "reward_std": 0.8700249195098877, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.34295059274188905, "rewards/wordcountpos_reward/raw_geo/std": 0.3944995856322822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1165.0625, "completions/mean_terminated_length": 1087.769287109375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.29065813162632526, "frac_reward_zero_std": 0.0, "grad_norm": 2.907760793327239, "kl": 0.0133056640625, "learning_rate": 9.04176657834562e-07, "loss": -0.0519, "num_tokens": 63115541.0, "reward": 5.960464477539063e-08, "reward_std": 0.7884068489074707, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07097501399150928, "rewards/wordcountpos_reward/raw_geo/std": 0.11136324509626805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 1134.625, "completions/mean_terminated_length": 1134.625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.29085817163432687, "frac_reward_zero_std": 0.0, "grad_norm": 2.530164969059736, "kl": 0.007476806640625, "learning_rate": 9.03982730667907e-07, "loss": 0.0071, "num_tokens": 63152255.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0467522144317627, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09472054558276298, "rewards/wordcountpos_reward/raw_geo/std": 0.0675707775882524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1397.6875, "completions/mean_terminated_length": 1383.071533203125, "completions/min_length": 1194.0, "completions/min_terminated_length": 1194.0, "epoch": 0.2910582116423285, "frac_reward_zero_std": 0.0, "grad_norm": 2.1387801673292866, "kl": 0.00675201416015625, "learning_rate": 9.037886308976651e-07, "loss": -0.0174, "num_tokens": 63202954.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9525010585784912, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1831876681146244, "rewards/wordcountpos_reward/raw_geo/std": 0.10197307117165429, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1089.9375, "completions/mean_terminated_length": 1089.9375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2912582516503301, "frac_reward_zero_std": 0.0, "grad_norm": 3.3811752996676043, "kl": 0.0152740478515625, "learning_rate": 9.035943586184804e-07, "loss": 0.0015, "num_tokens": 63252449.0, "reward": 0.0, "reward_std": 0.41891640424728394, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1702985180822, "rewards/wordcountpos_reward/raw_geo/std": 0.16368960262095725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1339.1875, "completions/mean_terminated_length": 1339.1875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.29145829165833165, "frac_reward_zero_std": 0.0, "grad_norm": 2.698494445690344, "kl": 0.01025390625, "learning_rate": 9.033999139250807e-07, "loss": -0.0086, "num_tokens": 63299236.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6506620645523071, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013557736177097943, "rewards/wordcountpos_reward/raw_geo/std": 0.028344943746016742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 1006.0, "completions/mean_terminated_length": 1006.0, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.29165833166633326, "frac_reward_zero_std": 0.0, "grad_norm": 3.100998193962974, "kl": 0.0355072021484375, "learning_rate": 9.032052969122781e-07, "loss": -0.0035, "num_tokens": 63330620.0, "reward": 0.0, "reward_std": 0.6489824056625366, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1805165598164709, "rewards/wordcountpos_reward/raw_geo/std": 0.06382372104453993, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1199.4375, "completions/mean_terminated_length": 1179.4000244140625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.29185837167433487, "frac_reward_zero_std": 0.0, "grad_norm": 3.386138669507454, "kl": 0.0152435302734375, "learning_rate": 9.030105076749689e-07, "loss": -0.0198, "num_tokens": 63376755.0, "reward": 0.0, "reward_std": 0.6886649131774902, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3210669576097273, "rewards/wordcountpos_reward/raw_geo/std": 0.15926616728538104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.18856180831641267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 953.1875, "completions/mean_terminated_length": 916.7333984375, "completions/min_length": 617.0, "completions/min_terminated_length": 617.0, "epoch": 0.2920584116823365, "frac_reward_zero_std": 0.0, "grad_norm": 3.59671685423396, "kl": 0.01483154296875, "learning_rate": 9.02815546308133e-07, "loss": -0.0531, "num_tokens": 63414678.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9387416839599609, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05132582857210057, "rewards/wordcountpos_reward/raw_geo/std": 0.08207116892225662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1160.125, "completions/mean_terminated_length": 895.7777709960938, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.2922584516903381, "frac_reward_zero_std": 0.0, "grad_norm": 2.9322333753581513, "kl": 0.011627197265625, "learning_rate": 9.02620412906835e-07, "loss": -0.0558, "num_tokens": 63461128.0, "reward": 0.0, "reward_std": 0.7622572183609009, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13543631733557196, "rewards/wordcountpos_reward/raw_geo/std": 0.15255109929123636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1019.125, "completions/mean_terminated_length": 950.4285888671875, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.29245849169833965, "frac_reward_zero_std": 0.0, "grad_norm": 3.282527783660248, "kl": 0.01265716552734375, "learning_rate": 9.024251075662222e-07, "loss": 0.0258, "num_tokens": 63506722.0, "reward": 0.0, "reward_std": 0.7570434808731079, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0019561692191171886, "rewards/wordcountpos_reward/raw_geo/std": 0.056019712513134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1420.3125, "completions/mean_terminated_length": 1358.3333740234375, "completions/min_length": 1103.0, "completions/min_terminated_length": 1103.0, "epoch": 0.29265853170634126, "frac_reward_zero_std": 0.0, "grad_norm": 2.60731084717123, "kl": 0.0098724365234375, "learning_rate": 9.022296303815266e-07, "loss": -0.0306, "num_tokens": 63560127.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9849776029586792, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048802319442386874, "rewards/wordcountpos_reward/raw_geo/std": 0.12270412185010794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1071.0, "completions/max_terminated_length": 1071.0, "completions/mean_length": 974.375, "completions/mean_terminated_length": 974.375, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.2928585717143429, "frac_reward_zero_std": 0.0, "grad_norm": 3.2571216211718226, "kl": 0.01107025146484375, "learning_rate": 9.02033981448064e-07, "loss": -0.0006, "num_tokens": 63595317.0, "reward": 0.0, "reward_std": 0.795859694480896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10935462328813718, "rewards/wordcountpos_reward/raw_geo/std": 0.09217429870174264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1078.5, "completions/mean_terminated_length": 1050.4000244140625, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.2930586117223445, "frac_reward_zero_std": 0.0, "grad_norm": 3.2872383343896336, "kl": 0.011322021484375, "learning_rate": 9.018381608612335e-07, "loss": 0.0025, "num_tokens": 63635301.0, "reward": 5.960464477539063e-08, "reward_std": 0.37604689598083496, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.212589338598644, "rewards/wordcountpos_reward/raw_geo/std": 0.13518994714939073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1140.1875, "completions/mean_terminated_length": 1140.1875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.29325865173034604, "frac_reward_zero_std": 0.0, "grad_norm": 2.4272363560938714, "kl": 0.00698089599609375, "learning_rate": 9.016421687165179e-07, "loss": -0.0108, "num_tokens": 63670032.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0003583431243896, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07876896043200607, "rewards/wordcountpos_reward/raw_geo/std": 0.09919018675192615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0873477511423713, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1008.75, "completions/mean_terminated_length": 976.0000610351562, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.29345869173834765, "frac_reward_zero_std": 0.0, "grad_norm": 3.7774867122739315, "kl": 0.0142974853515625, "learning_rate": 9.014460051094841e-07, "loss": -0.0315, "num_tokens": 63711076.0, "reward": 5.960464477539063e-08, "reward_std": 0.8494784832000732, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15095108051854517, "rewards/wordcountpos_reward/raw_geo/std": 0.25774572232462695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1042.8125, "completions/mean_terminated_length": 1042.8125, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.29365873174634927, "frac_reward_zero_std": 0.0, "grad_norm": 3.2962966635665474, "kl": 0.0138092041015625, "learning_rate": 9.012496701357824e-07, "loss": -0.0341, "num_tokens": 63756673.0, "reward": -1.4901161193847656e-08, "reward_std": 1.053252100944519, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029444167556084474, "rewards/wordcountpos_reward/raw_geo/std": 0.03939921715924935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 1040.875, "completions/mean_terminated_length": 1040.875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.2938587717543509, "frac_reward_zero_std": 0.0, "grad_norm": 3.3214026164414063, "kl": 0.0142059326171875, "learning_rate": 9.010531638911465e-07, "loss": -0.0332, "num_tokens": 63793879.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0144805908203125, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015375730814412644, "rewards/wordcountpos_reward/raw_geo/std": 0.08224743062757722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1052.5625, "completions/mean_terminated_length": 1052.5625, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.2940588117623525, "frac_reward_zero_std": 0.0, "grad_norm": 3.270719373495854, "kl": 0.01346588134765625, "learning_rate": 9.008564864713938e-07, "loss": -0.0328, "num_tokens": 63828504.0, "reward": 2.9802322387695312e-08, "reward_std": 0.46263664960861206, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11276991566135867, "rewards/wordcountpos_reward/raw_geo/std": 0.188062190156898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1111.6875, "completions/mean_terminated_length": 1111.6875, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.29425885177035405, "frac_reward_zero_std": 0.0, "grad_norm": 3.611614343065168, "kl": 0.01434326171875, "learning_rate": 9.006596379724251e-07, "loss": -0.0378, "num_tokens": 63870835.0, "reward": 0.0, "reward_std": 0.873188316822052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3010946281655233, "rewards/wordcountpos_reward/raw_geo/std": 0.24577298687623822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1338.3125, "completions/mean_terminated_length": 1241.300048828125, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.29445889177835566, "frac_reward_zero_std": 0.0, "grad_norm": 2.979333702855172, "kl": 0.0115509033203125, "learning_rate": 9.004626184902246e-07, "loss": 0.005, "num_tokens": 63915232.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4926319718360901, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09794124717938814, "rewards/wordcountpos_reward/raw_geo/std": 0.08788800550361218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1350.0, "completions/mean_terminated_length": 1281.8182373046875, "completions/min_length": 1108.0, "completions/min_terminated_length": 1108.0, "epoch": 0.29465893178635727, "frac_reward_zero_std": 0.0, "grad_norm": 2.4315544037012757, "kl": 0.00914764404296875, "learning_rate": 9.002654281208598e-07, "loss": 0.0206, "num_tokens": 63951000.0, "reward": -3.725290298461914e-09, "reward_std": 0.890720546245575, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06162392739483719, "rewards/wordcountpos_reward/raw_geo/std": 0.0862484596843867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 982.4375, "completions/mean_terminated_length": 982.4375, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.2948589717943589, "frac_reward_zero_std": 0.0, "grad_norm": 3.3396659014262204, "kl": 0.0169525146484375, "learning_rate": 9.000680669604819e-07, "loss": -0.0131, "num_tokens": 63988367.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0485255718231201, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03739086537212785, "rewards/wordcountpos_reward/raw_geo/std": 0.09627541169934727, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1138550085106622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 995.8125, "completions/mean_terminated_length": 995.8125, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.2950590118023605, "frac_reward_zero_std": 0.0, "grad_norm": 3.4118140314189827, "kl": 0.0121307373046875, "learning_rate": 8.998705351053248e-07, "loss": 0.0037, "num_tokens": 64027732.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7950841188430786, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06555033931700137, "rewards/wordcountpos_reward/raw_geo/std": 0.07034175343903533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1077.0625, "completions/mean_terminated_length": 1077.0625, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.29525905181036205, "frac_reward_zero_std": 0.0, "grad_norm": 3.431857491456502, "kl": 0.0131378173828125, "learning_rate": 8.996728326517062e-07, "loss": -0.0573, "num_tokens": 64077693.0, "reward": 0.0, "reward_std": 0.7284241914749146, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.013188125732718426, "rewards/wordcountpos_reward/raw_geo/std": 0.14356295921406018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1133.5, "completions/mean_terminated_length": 1109.0667724609375, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.29545909181836366, "frac_reward_zero_std": 0.0, "grad_norm": 2.2527113721395042, "kl": 0.00705718994140625, "learning_rate": 8.994749596960264e-07, "loss": -0.0215, "num_tokens": 64129941.0, "reward": 0.0, "reward_std": 0.9835470914840698, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12699088070444914, "rewards/wordcountpos_reward/raw_geo/std": 0.05648089467838851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 883.875, "completions/mean_terminated_length": 883.875, "completions/min_length": 628.0, "completions/min_terminated_length": 628.0, "epoch": 0.2956591318263653, "frac_reward_zero_std": 0.0, "grad_norm": 3.7440388674068736, "kl": 0.0107574462890625, "learning_rate": 8.992769163347695e-07, "loss": 0.0137, "num_tokens": 64167267.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8804106116294861, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01646649146547831, "rewards/wordcountpos_reward/raw_geo/std": 0.09721029831829282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1244.0, "completions/mean_terminated_length": 1226.933349609375, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.2958591718343669, "frac_reward_zero_std": 0.0, "grad_norm": 2.823684789413533, "kl": 0.01018524169921875, "learning_rate": 8.99078702664502e-07, "loss": 0.0069, "num_tokens": 64221379.0, "reward": 0.0, "reward_std": 0.7385225296020508, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23240210799676453, "rewards/wordcountpos_reward/raw_geo/std": 0.13867578949573683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1125.666748046875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.2960592118423685, "frac_reward_zero_std": 0.0, "grad_norm": 3.3930398243728552, "kl": 0.0128631591796875, "learning_rate": 8.98880318781874e-07, "loss": 0.0242, "num_tokens": 64261660.0, "reward": 7.450580596923828e-09, "reward_std": 0.7898614406585693, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.061337632571661374, "rewards/wordcountpos_reward/raw_geo/std": 0.04752054456784415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1251.8125, "completions/mean_terminated_length": 1216.357177734375, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.29625925185037005, "frac_reward_zero_std": 0.0, "grad_norm": 3.3341993468259337, "kl": 0.0147705078125, "learning_rate": 8.986817647836183e-07, "loss": -0.019, "num_tokens": 64308369.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7689966559410095, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07526627707586311, "rewards/wordcountpos_reward/raw_geo/std": 0.18979758190689364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1067.4375, "completions/mean_terminated_length": 1067.4375, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.29645929185837167, "frac_reward_zero_std": 0.0, "grad_norm": 3.282611817015571, "kl": 0.010986328125, "learning_rate": 8.984830407665508e-07, "loss": -0.0003, "num_tokens": 64349400.0, "reward": 5.960464477539063e-08, "reward_std": 0.7352669835090637, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08747978256397394, "rewards/wordcountpos_reward/raw_geo/std": 0.09140865488882069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1288.875, "completions/mean_terminated_length": 1218.5, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.2966593318663733, "frac_reward_zero_std": 0.0, "grad_norm": 3.543544277266936, "kl": 0.015106201171875, "learning_rate": 8.982841468275702e-07, "loss": -0.0366, "num_tokens": 64397334.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5321323871612549, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3596431703313685, "rewards/wordcountpos_reward/raw_geo/std": 0.16833698207965983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1092.3125, "completions/mean_terminated_length": 1092.3125, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.2968593718743749, "frac_reward_zero_std": 0.0, "grad_norm": 3.0396470994722247, "kl": 0.011993408203125, "learning_rate": 8.980850830636581e-07, "loss": 0.0154, "num_tokens": 64435579.0, "reward": 0.0, "reward_std": 0.8392201662063599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13006627566445106, "rewards/wordcountpos_reward/raw_geo/std": 0.09917892053397387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1142.0625, "completions/mean_terminated_length": 1142.0625, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2970594118823765, "frac_reward_zero_std": 0.0, "grad_norm": 2.631411584974668, "kl": 0.008941650390625, "learning_rate": 8.978858495718789e-07, "loss": -0.0484, "num_tokens": 64478340.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9899147152900696, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13299359940299732, "rewards/wordcountpos_reward/raw_geo/std": 0.05453504320482447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1084.0625, "completions/mean_terminated_length": 1024.6429443359375, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.29725945189037806, "frac_reward_zero_std": 0.0, "grad_norm": 2.639221648776102, "kl": 0.0066680908203125, "learning_rate": 8.976864464493796e-07, "loss": -0.0226, "num_tokens": 64522693.0, "reward": 0.0, "reward_std": 0.7909857034683228, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06510352959731772, "rewards/wordcountpos_reward/raw_geo/std": 0.09662416680032102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1264.5, "completions/mean_terminated_length": 1157.45458984375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.29745949189837967, "frac_reward_zero_std": 0.0, "grad_norm": 3.050306170018354, "kl": 0.0139923095703125, "learning_rate": 8.974868737933903e-07, "loss": -0.0164, "num_tokens": 64567949.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8124679327011108, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3103681704876814, "rewards/wordcountpos_reward/raw_geo/std": 0.0867463720802633, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1021.0, "completions/mean_terminated_length": 1021.0, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.2976595319063813, "frac_reward_zero_std": 0.0, "grad_norm": 2.834110721046252, "kl": 0.011505126953125, "learning_rate": 8.972871317012235e-07, "loss": 0.0315, "num_tokens": 64606853.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0578687191009521, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14952008359432423, "rewards/wordcountpos_reward/raw_geo/std": 0.1078846165093754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1066.0625, "completions/mean_terminated_length": 1066.0625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.2978595719143829, "frac_reward_zero_std": 0.0, "grad_norm": 2.635810518763671, "kl": 0.0156097412109375, "learning_rate": 8.970872202702742e-07, "loss": -0.0585, "num_tokens": 64640878.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9582006931304932, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08249479474259085, "rewards/wordcountpos_reward/raw_geo/std": 0.09163353459354723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1180.8125, "completions/mean_terminated_length": 1180.8125, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.2980596119223845, "frac_reward_zero_std": 0.0, "grad_norm": 3.220851269784932, "kl": 0.014892578125, "learning_rate": 8.968871395980201e-07, "loss": -0.0004, "num_tokens": 64688323.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8297221064567566, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11318538967370452, "rewards/wordcountpos_reward/raw_geo/std": 0.09185277935910437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1204.0, "completions/mean_terminated_length": 1204.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.29825965193038606, "frac_reward_zero_std": 0.0, "grad_norm": 3.3368590393809066, "kl": 0.014312744140625, "learning_rate": 8.966868897820217e-07, "loss": -0.0322, "num_tokens": 64732811.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0384639501571655, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010900561255736942, "rewards/wordcountpos_reward/raw_geo/std": 0.13743541736268067, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 794.0, "completions/min_terminated_length": 794.0, "epoch": 0.2984596919383877, "frac_reward_zero_std": 0.0, "grad_norm": 3.5284182634042405, "kl": 0.0128631591796875, "learning_rate": 8.964864709199216e-07, "loss": 0.001, "num_tokens": 64766091.0, "reward": 0.0, "reward_std": 0.8546037673950195, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0560437308105306, "rewards/wordcountpos_reward/raw_geo/std": 0.061560006982438284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 934.8125, "completions/mean_terminated_length": 934.8125, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.2986597319463893, "frac_reward_zero_std": 0.0, "grad_norm": 3.373517887641331, "kl": 0.0097808837890625, "learning_rate": 8.962858831094447e-07, "loss": -0.0251, "num_tokens": 64794240.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0567591190338135, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15887111797345885, "rewards/wordcountpos_reward/raw_geo/std": 0.07918070372071889, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1141.1875, "completions/mean_terminated_length": 1141.1875, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.2988597719543909, "frac_reward_zero_std": 0.0, "grad_norm": 3.3645402165793046, "kl": 0.015350341796875, "learning_rate": 8.960851264483989e-07, "loss": -0.0045, "num_tokens": 64840099.0, "reward": -4.470348358154297e-08, "reward_std": 0.9575287103652954, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18236224770281134, "rewards/wordcountpos_reward/raw_geo/std": 0.08057282423217577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1261.0625, "completions/mean_terminated_length": 1152.45458984375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.29905981196239245, "frac_reward_zero_std": 0.0, "grad_norm": 2.92627235551055, "kl": 0.01119232177734375, "learning_rate": 8.95884201034674e-07, "loss": -0.0009, "num_tokens": 64884820.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9978117942810059, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0890794109788454, "rewards/wordcountpos_reward/raw_geo/std": 0.14328580255528114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 976.875, "completions/mean_terminated_length": 976.875, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.29925985197039406, "frac_reward_zero_std": 0.0, "grad_norm": 3.8188086695178147, "kl": 0.01422119140625, "learning_rate": 8.956831069662418e-07, "loss": -0.0015, "num_tokens": 64933074.0, "reward": 0.0, "reward_std": 1.0199470520019531, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027670575276743683, "rewards/wordcountpos_reward/raw_geo/std": 0.2272771345521765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1172.5625, "completions/mean_terminated_length": 1172.5625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.2994598919783957, "frac_reward_zero_std": 0.0, "grad_norm": 3.332621403984407, "kl": 0.0130462646484375, "learning_rate": 8.954818443411573e-07, "loss": -0.0075, "num_tokens": 64970147.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6304559111595154, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10435015607697648, "rewards/wordcountpos_reward/raw_geo/std": 0.05552716214907367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1080.1875, "completions/mean_terminated_length": 1080.1875, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.2996599319863973, "frac_reward_zero_std": 0.0, "grad_norm": 2.8890431651908743, "kl": 0.012115478515625, "learning_rate": 8.952804132575563e-07, "loss": -0.0333, "num_tokens": 65020622.0, "reward": -5.960464477539063e-08, "reward_std": 0.47698742151260376, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3057601877730295, "rewards/wordcountpos_reward/raw_geo/std": 0.2076816826229056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1268.3125, "completions/mean_terminated_length": 1214.84619140625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.2998599719943989, "frac_reward_zero_std": 0.0, "grad_norm": 3.1205365609612774, "kl": 0.01275634765625, "learning_rate": 8.950788138136581e-07, "loss": 0.0104, "num_tokens": 65068131.0, "reward": 0.0, "reward_std": 0.351090669631958, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1163190974764945, "rewards/wordcountpos_reward/raw_geo/std": 0.2405127123551206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.17379212785308693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 1019.0, "completions/mean_terminated_length": 1019.0, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.30006001200240046, "frac_reward_zero_std": 0.0, "grad_norm": 3.223415423046262, "kl": 0.00958251953125, "learning_rate": 8.948770461077634e-07, "loss": -0.0221, "num_tokens": 65105851.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9944062232971191, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0954593473430376, "rewards/wordcountpos_reward/raw_geo/std": 0.04040350468251881, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1212.0, "completions/mean_terminated_length": 1116.0, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.30026005201040207, "frac_reward_zero_std": 0.0, "grad_norm": 3.4742908990646018, "kl": 0.016845703125, "learning_rate": 8.946751102382548e-07, "loss": -0.0141, "num_tokens": 65154067.0, "reward": 3.725290298461914e-08, "reward_std": 1.0562506914138794, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0051511624163113326, "rewards/wordcountpos_reward/raw_geo/std": 0.10285652826258446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1232.5625, "completions/mean_terminated_length": 1232.5625, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.3004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 2.551717769585929, "kl": 0.00740814208984375, "learning_rate": 8.944730063035973e-07, "loss": 0.0178, "num_tokens": 65194148.0, "reward": 7.450580596923828e-09, "reward_std": 1.000548005104065, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11724307523568306, "rewards/wordcountpos_reward/raw_geo/std": 0.04635386610353036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.3006601320264053, "frac_reward_zero_std": 0.0, "grad_norm": 3.3612995099052623, "kl": 0.016510009765625, "learning_rate": 8.942707344023379e-07, "loss": -0.0402, "num_tokens": 65237998.0, "reward": -7.450580596923828e-09, "reward_std": 1.0554802417755127, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13163303920657526, "rewards/wordcountpos_reward/raw_geo/std": 0.050153446665678975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1250.5, "completions/mean_terminated_length": 1250.5, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.3008601720344069, "frac_reward_zero_std": 0.0, "grad_norm": 2.8500599488625764, "kl": 0.0106353759765625, "learning_rate": 8.940682946331049e-07, "loss": 0.0189, "num_tokens": 65282462.0, "reward": 0.0, "reward_std": 0.7060377597808838, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08134736297805265, "rewards/wordcountpos_reward/raw_geo/std": 0.18010497008739787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 986.375, "completions/mean_terminated_length": 952.1333618164062, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.30106021204240846, "frac_reward_zero_std": 0.0, "grad_norm": 2.8455644929712167, "kl": 0.0103759765625, "learning_rate": 8.938656870946092e-07, "loss": -0.0906, "num_tokens": 65318396.0, "reward": 0.0, "reward_std": 0.7076146006584167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06985231981779662, "rewards/wordcountpos_reward/raw_geo/std": 0.08359844855866497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1025.3125, "completions/mean_terminated_length": 1025.3125, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.30126025205041007, "frac_reward_zero_std": 0.0, "grad_norm": 3.314180251340443, "kl": 0.013031005859375, "learning_rate": 8.936629118856429e-07, "loss": -0.024, "num_tokens": 65349169.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7110707759857178, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04682025849647551, "rewards/wordcountpos_reward/raw_geo/std": 0.05768349962498794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1041.875, "completions/mean_terminated_length": 1041.875, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.3014602920584117, "frac_reward_zero_std": 0.0, "grad_norm": 3.6436343324698814, "kl": 0.01800537109375, "learning_rate": 8.934599691050802e-07, "loss": -0.0181, "num_tokens": 65396655.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6815053224563599, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057494124272892, "rewards/wordcountpos_reward/raw_geo/std": 0.11644629306887737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19163043135739746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1237.8125, "completions/mean_terminated_length": 1220.3333740234375, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.3016603320664133, "frac_reward_zero_std": 0.0, "grad_norm": 2.8102307430057945, "kl": 0.0093841552734375, "learning_rate": 8.932568588518771e-07, "loss": -0.0143, "num_tokens": 65440036.0, "reward": 0.0, "reward_std": 1.0306439399719238, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12947051946796104, "rewards/wordcountpos_reward/raw_geo/std": 0.09813034334319035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1323.25, "completions/mean_terminated_length": 1282.4615478515625, "completions/min_length": 1040.0, "completions/min_terminated_length": 1040.0, "epoch": 0.3018603720744149, "frac_reward_zero_std": 0.0, "grad_norm": 2.5604035559231155, "kl": 0.01171875, "learning_rate": 8.930535812250708e-07, "loss": -0.0361, "num_tokens": 65488600.0, "reward": 0.0, "reward_std": 0.48000064492225647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03358847273707639, "rewards/wordcountpos_reward/raw_geo/std": 0.1420739152443631, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 971.625, "completions/mean_terminated_length": 971.625, "completions/min_length": 552.0, "completions/min_terminated_length": 552.0, "epoch": 0.30206041208241646, "frac_reward_zero_std": 0.0, "grad_norm": 3.506120609543339, "kl": 0.01434326171875, "learning_rate": 8.928501363237807e-07, "loss": 0.0035, "num_tokens": 65527330.0, "reward": 0.0, "reward_std": 0.8857091665267944, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15356173791888733, "rewards/wordcountpos_reward/raw_geo/std": 0.18465784116682463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1059.25, "completions/mean_terminated_length": 1029.86669921875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3022604520904181, "frac_reward_zero_std": 0.0, "grad_norm": 3.639112194115988, "kl": 0.0157318115234375, "learning_rate": 8.926465242472071e-07, "loss": -0.0264, "num_tokens": 65571854.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9478989839553833, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14157868869823553, "rewards/wordcountpos_reward/raw_geo/std": 0.1972394817386133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1223.9375, "completions/mean_terminated_length": 1160.2308349609375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3024604920984197, "frac_reward_zero_std": 0.0, "grad_norm": 2.514041060851769, "kl": 0.011627197265625, "learning_rate": 8.924427450946324e-07, "loss": -0.0286, "num_tokens": 65625477.0, "reward": 0.0, "reward_std": 0.6899297833442688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.25997325963201795, "rewards/wordcountpos_reward/raw_geo/std": 0.15405689843927894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 978.0625, "completions/mean_terminated_length": 978.0625, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.3026605321064213, "frac_reward_zero_std": 0.0, "grad_norm": 3.079938444146363, "kl": 0.014190673828125, "learning_rate": 8.922387989654202e-07, "loss": -0.0087, "num_tokens": 65661678.0, "reward": 0.0, "reward_std": 0.8159579634666443, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10778193063840877, "rewards/wordcountpos_reward/raw_geo/std": 0.17601365047533704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1267.1875, "completions/mean_terminated_length": 1233.9285888671875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3028605721144229, "frac_reward_zero_std": 0.0, "grad_norm": 2.974711941704128, "kl": 0.014801025390625, "learning_rate": 8.920346859590154e-07, "loss": 0.0138, "num_tokens": 65709177.0, "reward": 0.0, "reward_std": 0.8008859157562256, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0703179232245362, "rewards/wordcountpos_reward/raw_geo/std": 0.11750775017975358, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1033.8125, "completions/mean_terminated_length": 1033.8125, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.30306061212242447, "frac_reward_zero_std": 0.0, "grad_norm": 3.5582209337970148, "kl": 0.0156707763671875, "learning_rate": 8.918304061749449e-07, "loss": 0.0098, "num_tokens": 65747190.0, "reward": -3.725290298461914e-09, "reward_std": 1.027848482131958, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1761510692932802, "rewards/wordcountpos_reward/raw_geo/std": 0.13253862335445202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1243.8125, "completions/mean_terminated_length": 1226.7333984375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.3032606521304261, "frac_reward_zero_std": 0.0, "grad_norm": 3.1865204098648148, "kl": 0.012603759765625, "learning_rate": 8.916259597128159e-07, "loss": -0.0257, "num_tokens": 65793603.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0383530855178833, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01912401168482963, "rewards/wordcountpos_reward/raw_geo/std": 0.03879880029825146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1035.125, "completions/mean_terminated_length": 1035.125, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.3034606921384277, "frac_reward_zero_std": 0.0, "grad_norm": 3.395770656261149, "kl": 0.013214111328125, "learning_rate": 8.914213466723177e-07, "loss": 0.0192, "num_tokens": 65826213.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6731513738632202, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06547951117638277, "rewards/wordcountpos_reward/raw_geo/std": 0.11998319624795317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 923.6875, "completions/mean_terminated_length": 923.6875, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.3036607321464293, "frac_reward_zero_std": 0.0, "grad_norm": 3.685675213474096, "kl": 0.014190673828125, "learning_rate": 8.912165671532204e-07, "loss": -0.0029, "num_tokens": 65863672.0, "reward": 7.450580596923828e-09, "reward_std": 0.9551426768302917, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.011890006927995342, "rewards/wordcountpos_reward/raw_geo/std": 0.1183979178812905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1174.4375, "completions/mean_terminated_length": 921.2222290039062, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.3038607721544309, "frac_reward_zero_std": 0.0, "grad_norm": 3.333695501125601, "kl": 0.0115203857421875, "learning_rate": 8.910116212553758e-07, "loss": 0.0078, "num_tokens": 65907455.0, "reward": 3.725290298461914e-09, "reward_std": 1.0634804964065552, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07307470327684458, "rewards/wordcountpos_reward/raw_geo/std": 0.0725740455480212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1295.6875, "completions/mean_terminated_length": 1248.5384521484375, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.30406081216243247, "frac_reward_zero_std": 0.0, "grad_norm": 3.408929085143891, "kl": 0.0147552490234375, "learning_rate": 8.90806509078716e-07, "loss": -0.0097, "num_tokens": 65954618.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0090022087097168, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008059746663650162, "rewards/wordcountpos_reward/raw_geo/std": 0.023627014241018925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1240.0, "completions/mean_terminated_length": 1153.3333740234375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.3042608521704341, "frac_reward_zero_std": 0.0, "grad_norm": 3.3022804045739576, "kl": 0.0134124755859375, "learning_rate": 8.906012307232548e-07, "loss": -0.0373, "num_tokens": 66004442.0, "reward": 0.0, "reward_std": 0.38872620463371277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08013572981636055, "rewards/wordcountpos_reward/raw_geo/std": 0.25926577963374786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 1009.8125, "completions/mean_terminated_length": 1009.8125, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.3044608921784357, "frac_reward_zero_std": 0.0, "grad_norm": 3.688115607131627, "kl": 0.0160980224609375, "learning_rate": 8.903957862890869e-07, "loss": 0.0329, "num_tokens": 66043671.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0300618410110474, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07682258703412403, "rewards/wordcountpos_reward/raw_geo/std": 0.0510424403426051, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945508, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1178.6875, "completions/mean_terminated_length": 1104.5384521484375, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.3046609321864373, "frac_reward_zero_std": 0.0, "grad_norm": 2.8687043930106912, "kl": 0.00933837890625, "learning_rate": 8.901901758763879e-07, "loss": 0.0163, "num_tokens": 66098746.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6993717551231384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06032629119198504, "rewards/wordcountpos_reward/raw_geo/std": 0.2586133961228637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928165, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1083.1875, "completions/mean_terminated_length": 1083.1875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.30486097219443886, "frac_reward_zero_std": 0.0, "grad_norm": 3.960019192816607, "kl": 0.015716552734375, "learning_rate": 8.899843995854142e-07, "loss": 0.0429, "num_tokens": 66144581.0, "reward": 0.0, "reward_std": 0.7789512872695923, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06869457204172164, "rewards/wordcountpos_reward/raw_geo/std": 0.15523565061403158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 968.875, "completions/mean_terminated_length": 968.875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.3050610122024405, "frac_reward_zero_std": 0.0, "grad_norm": 3.0154508085098914, "kl": 0.01128387451171875, "learning_rate": 8.897784575165037e-07, "loss": -0.0026, "num_tokens": 66187843.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3457336127758026, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20323528767081042, "rewards/wordcountpos_reward/raw_geo/std": 0.1391034346409706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1202.75, "completions/mean_terminated_length": 1067.6363525390625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3052610522104421, "frac_reward_zero_std": 0.0, "grad_norm": 3.5934913552454413, "kl": 0.015594482421875, "learning_rate": 8.895723497700743e-07, "loss": 0.0109, "num_tokens": 66229855.0, "reward": -5.960464477539063e-08, "reward_std": 0.8925884366035461, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11756340675372441, "rewards/wordcountpos_reward/raw_geo/std": 0.1757370496620371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 997.6875, "completions/mean_terminated_length": 997.6875, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.3054610922184437, "frac_reward_zero_std": 0.0, "grad_norm": 3.1240051265169857, "kl": 0.0145721435546875, "learning_rate": 8.893660764466251e-07, "loss": 0.0152, "num_tokens": 66264674.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9766949415206909, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07169236743143663, "rewards/wordcountpos_reward/raw_geo/std": 0.07782348690095019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1138.0, "completions/mean_terminated_length": 1113.86669921875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3056611322264453, "frac_reward_zero_std": 0.0, "grad_norm": 3.143491376853367, "kl": 0.0150299072265625, "learning_rate": 8.89159637646736e-07, "loss": -0.0021, "num_tokens": 66312594.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8591177463531494, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014970841755525247, "rewards/wordcountpos_reward/raw_geo/std": 0.1220851380116241, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1277.0, "completions/mean_terminated_length": 1143.2000732421875, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.30586117223444687, "frac_reward_zero_std": 0.0, "grad_norm": 3.007880027015834, "kl": 0.0118560791015625, "learning_rate": 8.889530334710676e-07, "loss": 0.0448, "num_tokens": 66359434.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0181944370269775, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004598640879066152, "rewards/wordcountpos_reward/raw_geo/std": 0.10980499845200752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1142.5625, "completions/mean_terminated_length": 1142.5625, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.3060612122424485, "frac_reward_zero_std": 0.0, "grad_norm": 2.7142540274490043, "kl": 0.00984954833984375, "learning_rate": 8.887462640203609e-07, "loss": 0.016, "num_tokens": 66409475.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8652361035346985, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09410256217471537, "rewards/wordcountpos_reward/raw_geo/std": 0.21772364414204134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1027.25, "completions/mean_terminated_length": 1027.25, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.3062612522504501, "frac_reward_zero_std": 0.0, "grad_norm": 3.0904285943136074, "kl": 0.009723663330078125, "learning_rate": 8.885393293954377e-07, "loss": -0.0253, "num_tokens": 66440711.0, "reward": 2.9802322387695312e-08, "reward_std": 0.38162240386009216, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06683550399186128, "rewards/wordcountpos_reward/raw_geo/std": 0.13292134844170284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1228.75, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.3064612922584517, "frac_reward_zero_std": 0.0, "grad_norm": 2.354316043711627, "kl": 0.0064544677734375, "learning_rate": 8.883322296972001e-07, "loss": -0.0337, "num_tokens": 66492203.0, "reward": -7.450580596923828e-09, "reward_std": 0.9064786434173584, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.002148987847535119, "rewards/wordcountpos_reward/raw_geo/std": 0.06872546579897962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1219.9375, "completions/mean_terminated_length": 1201.2667236328125, "completions/min_length": 256.0, "completions/min_terminated_length": 256.0, "epoch": 0.3066613322664533, "frac_reward_zero_std": 0.0, "grad_norm": 2.7026574859399686, "kl": 0.0135498046875, "learning_rate": 8.881249650266311e-07, "loss": -0.1129, "num_tokens": 66547018.0, "reward": 0.0, "reward_std": 0.37324953079223633, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08052890528929858, "rewards/wordcountpos_reward/raw_geo/std": 0.08937977631022495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1054.625, "completions/mean_terminated_length": 1054.625, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.30686137227445487, "frac_reward_zero_std": 0.0, "grad_norm": 3.2793679076034477, "kl": 0.0122222900390625, "learning_rate": 8.879175354847937e-07, "loss": -0.0789, "num_tokens": 66600492.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8591996431350708, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01607357559738295, "rewards/wordcountpos_reward/raw_geo/std": 0.03290865455955885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1036.5625, "completions/mean_terminated_length": 1036.5625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3070614122824565, "frac_reward_zero_std": 0.0, "grad_norm": 2.989883769889972, "kl": 0.0112762451171875, "learning_rate": 8.877099411728314e-07, "loss": -0.0331, "num_tokens": 66642533.0, "reward": 0.0, "reward_std": 0.6208987236022949, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1308921608609547, "rewards/wordcountpos_reward/raw_geo/std": 0.15574081864194272, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1150.625, "completions/mean_terminated_length": 1150.625, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.3072614522904581, "frac_reward_zero_std": 0.0, "grad_norm": 2.953824449933008, "kl": 0.0125732421875, "learning_rate": 8.875021821919684e-07, "loss": -0.0374, "num_tokens": 66679439.0, "reward": 1.4901161193847656e-08, "reward_std": 1.026672124862671, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026093179943833793, "rewards/wordcountpos_reward/raw_geo/std": 0.07362271210631866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1171.3125, "completions/mean_terminated_length": 1149.4000244140625, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.3074614922984597, "frac_reward_zero_std": 0.0, "grad_norm": 2.9125444942937806, "kl": 0.0110015869140625, "learning_rate": 8.872942586435088e-07, "loss": -0.0154, "num_tokens": 66725084.0, "reward": 0.0, "reward_std": 0.8341853022575378, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026693848646373117, "rewards/wordcountpos_reward/raw_geo/std": 0.16526720457345884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1168.0625, "completions/mean_terminated_length": 1145.933349609375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3076615323064613, "frac_reward_zero_std": 0.0, "grad_norm": 2.8862942343229387, "kl": 0.0129852294921875, "learning_rate": 8.87086170628837e-07, "loss": -0.0406, "num_tokens": 66767885.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7962286472320557, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08119704825835708, "rewards/wordcountpos_reward/raw_geo/std": 0.06548641618818236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1183.375, "completions/mean_terminated_length": 1162.2667236328125, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.3078615723144629, "frac_reward_zero_std": 0.0, "grad_norm": 3.1232281173726033, "kl": 0.01287841796875, "learning_rate": 8.868779182494178e-07, "loss": 0.003, "num_tokens": 66817035.0, "reward": 0.0, "reward_std": 0.4290623068809509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058969565918843836, "rewards/wordcountpos_reward/raw_geo/std": 0.2860248805638058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1107.25, "completions/mean_terminated_length": 1081.0667724609375, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.3080616123224645, "frac_reward_zero_std": 0.0, "grad_norm": 2.7581782686460237, "kl": 0.01030731201171875, "learning_rate": 8.86669501606796e-07, "loss": 0.0096, "num_tokens": 66865775.0, "reward": 0.0, "reward_std": 0.7750606536865234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11571340762656603, "rewards/wordcountpos_reward/raw_geo/std": 0.09762077797368346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1182.1875, "completions/mean_terminated_length": 1182.1875, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.3082616523304661, "frac_reward_zero_std": 0.0, "grad_norm": 3.152020457208807, "kl": 0.0146942138671875, "learning_rate": 8.864609208025962e-07, "loss": -0.0267, "num_tokens": 66916594.0, "reward": -7.450580596923828e-09, "reward_std": 1.0525057315826416, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0763243572763026, "rewards/wordcountpos_reward/raw_geo/std": 0.06960535562230581, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1028.5625, "completions/mean_terminated_length": 997.1333618164062, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3084616923384677, "frac_reward_zero_std": 0.0, "grad_norm": 3.0722721447057095, "kl": 0.0120849609375, "learning_rate": 8.862521759385238e-07, "loss": 0.006, "num_tokens": 66959019.0, "reward": -5.960464477539063e-08, "reward_std": 0.6212877035140991, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009477141838253001, "rewards/wordcountpos_reward/raw_geo/std": 0.02136304086076395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1059.75, "completions/mean_terminated_length": 1059.75, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.3086617323464693, "frac_reward_zero_std": 0.0, "grad_norm": 3.19483790587238, "kl": 0.0100860595703125, "learning_rate": 8.860432671163635e-07, "loss": -0.0138, "num_tokens": 66995599.0, "reward": 0.0, "reward_std": 1.036252498626709, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03399354742090029, "rewards/wordcountpos_reward/raw_geo/std": 0.03803052726939504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1121.0, "completions/mean_terminated_length": 1121.0, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.3088617723544709, "frac_reward_zero_std": 0.0, "grad_norm": 3.195248852339637, "kl": 0.0141448974609375, "learning_rate": 8.858341944379801e-07, "loss": 0.029, "num_tokens": 67046823.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9540793299674988, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013055438548569605, "rewards/wordcountpos_reward/raw_geo/std": 0.11409094403894339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752094, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1185.75, "completions/mean_terminated_length": 1113.2308349609375, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.3090618123624725, "frac_reward_zero_std": 0.0, "grad_norm": 2.9243491131790127, "kl": 0.01214599609375, "learning_rate": 8.856249580053186e-07, "loss": -0.0005, "num_tokens": 67090947.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0064913034439087, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3221075759757471, "rewards/wordcountpos_reward/raw_geo/std": 0.08181261036086862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1059.0, "completions/mean_terminated_length": 1059.0, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.3092618523704741, "frac_reward_zero_std": 0.0, "grad_norm": 3.2721800151200835, "kl": 0.011260986328125, "learning_rate": 8.854155579204036e-07, "loss": -0.0097, "num_tokens": 67125923.0, "reward": -1.862645149230957e-09, "reward_std": 1.0666723251342773, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04273423305010233, "rewards/wordcountpos_reward/raw_geo/std": 0.03267786801817852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1107.0, "completions/mean_terminated_length": 1107.0, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.3094618923784757, "frac_reward_zero_std": 0.0, "grad_norm": 3.1833100717055838, "kl": 0.01153564453125, "learning_rate": 8.852059942853393e-07, "loss": -0.0333, "num_tokens": 67177739.0, "reward": 0.0, "reward_std": 0.753730833530426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.021449551531011014, "rewards/wordcountpos_reward/raw_geo/std": 0.09060810907392117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1224.6875, "completions/mean_terminated_length": 1185.357177734375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.30966193238647727, "frac_reward_zero_std": 0.0, "grad_norm": 2.9511033761526155, "kl": 0.02164459228515625, "learning_rate": 8.8499626720231e-07, "loss": 0.0404, "num_tokens": 67230806.0, "reward": 0.0, "reward_std": 0.5632149577140808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.039873350935431394, "rewards/wordcountpos_reward/raw_geo/std": 0.1456775175156621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.2511823890972307, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 999.875, "completions/mean_terminated_length": 999.875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.3098619723944789, "frac_reward_zero_std": 0.0, "grad_norm": 3.058683076178571, "kl": 0.01165771484375, "learning_rate": 8.847863767735798e-07, "loss": 0.0017, "num_tokens": 67275404.0, "reward": 0.0, "reward_std": 0.6979461908340454, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08095004888482196, "rewards/wordcountpos_reward/raw_geo/std": 0.14020305632208324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1206.9375, "completions/mean_terminated_length": 1187.4000244140625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.3100620124024805, "frac_reward_zero_std": 0.0, "grad_norm": 3.4435971821270046, "kl": 0.0122222900390625, "learning_rate": 8.84576323101492e-07, "loss": -0.0129, "num_tokens": 67321555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.49310189485549927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10908625314789303, "rewards/wordcountpos_reward/raw_geo/std": 0.22350127139590187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1233.875, "completions/mean_terminated_length": 1074.2000732421875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3102620524104821, "frac_reward_zero_std": 0.0, "grad_norm": 2.2406333642590166, "kl": 0.0165252685546875, "learning_rate": 8.843661062884697e-07, "loss": -0.0069, "num_tokens": 67368185.0, "reward": 0.0, "reward_std": 0.963836669921875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04149796524792276, "rewards/wordcountpos_reward/raw_geo/std": 0.07743742581845506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1191.125, "completions/mean_terminated_length": 1191.125, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.3104620924184837, "frac_reward_zero_std": 0.0, "grad_norm": 2.8252713112414907, "kl": 0.01031494140625, "learning_rate": 8.841557264370157e-07, "loss": -0.0293, "num_tokens": 67407827.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0579392910003662, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08114629525293365, "rewards/wordcountpos_reward/raw_geo/std": 0.1420111359841418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1327.375, "completions/mean_terminated_length": 1193.111083984375, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.31066213242648527, "frac_reward_zero_std": 0.0, "grad_norm": 3.286008875953418, "kl": 0.0150604248046875, "learning_rate": 8.839451836497123e-07, "loss": -0.0001, "num_tokens": 67451497.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8361608982086182, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19903882233755354, "rewards/wordcountpos_reward/raw_geo/std": 0.21872693306155905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1219.75, "completions/mean_terminated_length": 1219.75, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.3108621724344869, "frac_reward_zero_std": 0.0, "grad_norm": 2.447575028712033, "kl": 0.0086212158203125, "learning_rate": 8.837344780292207e-07, "loss": -0.0388, "num_tokens": 67489741.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8923624753952026, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04062641953505993, "rewards/wordcountpos_reward/raw_geo/std": 0.16552705330602832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1284.8125, "completions/mean_terminated_length": 1235.1539306640625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.3110622124424885, "frac_reward_zero_std": 0.0, "grad_norm": 3.127903306648886, "kl": 0.0142974853515625, "learning_rate": 8.835236096782823e-07, "loss": -0.0748, "num_tokens": 67535466.0, "reward": 1.4901161193847656e-08, "reward_std": 1.044126033782959, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00795851783968677, "rewards/wordcountpos_reward/raw_geo/std": 0.16746145196721515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1099.6875, "completions/mean_terminated_length": 1099.6875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3112622524504901, "frac_reward_zero_std": 0.0, "grad_norm": 3.089450921793083, "kl": 0.01044464111328125, "learning_rate": 8.833125786997172e-07, "loss": -0.0133, "num_tokens": 67584789.0, "reward": 0.0, "reward_std": 0.8578050136566162, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06936662673855049, "rewards/wordcountpos_reward/raw_geo/std": 0.14250272325423746, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1203.9375, "completions/mean_terminated_length": 1135.615478515625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.3114622924584917, "frac_reward_zero_std": 0.0, "grad_norm": 2.8987471743600977, "kl": 0.01373291015625, "learning_rate": 8.831013851964253e-07, "loss": -0.0741, "num_tokens": 67626764.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9918359518051147, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12341565297831397, "rewards/wordcountpos_reward/raw_geo/std": 0.08525236833099371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 979.375, "completions/mean_terminated_length": 979.375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.3116623324664933, "frac_reward_zero_std": 0.0, "grad_norm": 3.4869007134265724, "kl": 0.0156707763671875, "learning_rate": 8.828900292713852e-07, "loss": -0.0213, "num_tokens": 67673634.0, "reward": 0.0, "reward_std": 0.9104900360107422, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08684062188233006, "rewards/wordcountpos_reward/raw_geo/std": 0.046562681978747904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1146.375, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.3118623724744949, "frac_reward_zero_std": 0.0, "grad_norm": 2.9611884467169225, "kl": 0.0125732421875, "learning_rate": 8.826785110276554e-07, "loss": -0.0058, "num_tokens": 67713352.0, "reward": 2.9802322387695312e-08, "reward_std": 1.012331485748291, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0028908920609303883, "rewards/wordcountpos_reward/raw_geo/std": 0.10621303819614113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1272.25, "completions/mean_terminated_length": 1257.0667724609375, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.3120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 3.132556069123932, "kl": 0.0156097412109375, "learning_rate": 8.824668305683727e-07, "loss": 0.0027, "num_tokens": 67763820.0, "reward": -5.960464477539063e-08, "reward_std": 0.6365565657615662, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.276049301693915, "rewards/wordcountpos_reward/raw_geo/std": 0.2256541879867406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1107.3125, "completions/mean_terminated_length": 1107.3125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.3122624524904981, "frac_reward_zero_std": 0.0, "grad_norm": 3.6476165619780776, "kl": 0.0171356201171875, "learning_rate": 8.822549879967542e-07, "loss": 0.0177, "num_tokens": 67810281.0, "reward": 0.0, "reward_std": 0.7430156469345093, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07537980442832835, "rewards/wordcountpos_reward/raw_geo/std": 0.1517052103644729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 1271.8125, "completions/mean_terminated_length": 1043.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.3124624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 3.2532799207854755, "kl": 0.0138092041015625, "learning_rate": 8.820429834160944e-07, "loss": -0.0396, "num_tokens": 67864254.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6171025633811951, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013195082058544057, "rewards/wordcountpos_reward/raw_geo/std": 0.13006413643457318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 885.8125, "completions/mean_terminated_length": 885.8125, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.3126625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 3.378178294980577, "kl": 0.01348876953125, "learning_rate": 8.818308169297683e-07, "loss": 0.0001, "num_tokens": 67904947.0, "reward": 0.0, "reward_std": 0.5333974361419678, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007400721060385203, "rewards/wordcountpos_reward/raw_geo/std": 0.08350638556766352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1112.4375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.3128625725145029, "frac_reward_zero_std": 0.0, "grad_norm": 3.1676097085993753, "kl": 0.0122528076171875, "learning_rate": 8.816184886412291e-07, "loss": -0.0213, "num_tokens": 67949738.0, "reward": -2.2351741790771484e-08, "reward_std": 1.002312421798706, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01510218146175955, "rewards/wordcountpos_reward/raw_geo/std": 0.07360919508933594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1080.4375, "completions/mean_terminated_length": 1080.4375, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.3130626125225045, "frac_reward_zero_std": 0.0, "grad_norm": 3.5864291650125497, "kl": 0.0146026611328125, "learning_rate": 8.814059986540087e-07, "loss": 0.0437, "num_tokens": 67996721.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7108614444732666, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0257855742980834, "rewards/wordcountpos_reward/raw_geo/std": 0.04412411534207239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 947.0625, "completions/mean_terminated_length": 947.0625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.3132626525305061, "frac_reward_zero_std": 0.0, "grad_norm": 3.8995940613253968, "kl": 0.0153656005859375, "learning_rate": 8.811933470717187e-07, "loss": 0.0088, "num_tokens": 68047810.0, "reward": 2.60770320892334e-08, "reward_std": 1.0452021360397339, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10611139640300007, "rewards/wordcountpos_reward/raw_geo/std": 0.07339664949013859, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.26105200276601626, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1122.1429443359375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.3134626925385077, "frac_reward_zero_std": 0.0, "grad_norm": 3.373046760086607, "kl": 0.01495361328125, "learning_rate": 8.809805339980489e-07, "loss": -0.0089, "num_tokens": 68098488.0, "reward": -2.9802322387695312e-08, "reward_std": 0.662135124206543, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0035310446599344653, "rewards/wordcountpos_reward/raw_geo/std": 0.10061880535202689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1205.9375, "completions/mean_terminated_length": 1186.3333740234375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.3136627325465093, "frac_reward_zero_std": 0.0, "grad_norm": 2.8816348634711813, "kl": 0.009613037109375, "learning_rate": 8.807675595367674e-07, "loss": 0.0374, "num_tokens": 68136703.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8111258149147034, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00523816750743297, "rewards/wordcountpos_reward/raw_geo/std": 0.1510539922485454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1235.125, "completions/mean_terminated_length": 1197.2857666015625, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.3138627725545109, "frac_reward_zero_std": 0.0, "grad_norm": 2.5103383488024105, "kl": 0.0102691650390625, "learning_rate": 8.805544237917222e-07, "loss": -0.0355, "num_tokens": 68183881.0, "reward": 0.0, "reward_std": 0.792202353477478, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029927673717363802, "rewards/wordcountpos_reward/raw_geo/std": 0.12631156238380636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1223.75, "completions/mean_terminated_length": 1223.75, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.3140628125625125, "frac_reward_zero_std": 0.0, "grad_norm": 2.8913291404074664, "kl": 0.010833740234375, "learning_rate": 8.803411268668387e-07, "loss": 0.0318, "num_tokens": 68233653.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8148121237754822, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07154118218644451, "rewards/wordcountpos_reward/raw_geo/std": 0.09725435575952426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1125.0, "completions/max_terminated_length": 1125.0, "completions/mean_length": 942.9375, "completions/mean_terminated_length": 942.9375, "completions/min_length": 561.0, "completions/min_terminated_length": 561.0, "epoch": 0.3142628525705141, "frac_reward_zero_std": 0.0, "grad_norm": 2.4342270866702056, "kl": 0.005859375, "learning_rate": 8.801276688661217e-07, "loss": -0.0157, "num_tokens": 68272108.0, "reward": 0.0, "reward_std": 0.8952018618583679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21659465905760503, "rewards/wordcountpos_reward/raw_geo/std": 0.027743311715286458, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1131.0, "completions/mean_terminated_length": 1131.0, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.31446289257851573, "frac_reward_zero_std": 0.0, "grad_norm": 4.002889922843912, "kl": 0.017974853515625, "learning_rate": 8.799140498936545e-07, "loss": -0.0396, "num_tokens": 68315004.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9004261493682861, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017226014681493804, "rewards/wordcountpos_reward/raw_geo/std": 0.10736089592095367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1152.0, "completions/mean_terminated_length": 1152.0, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3146629325865173, "frac_reward_zero_std": 0.0, "grad_norm": 3.163317177855879, "kl": 0.01373291015625, "learning_rate": 8.797002700535984e-07, "loss": -0.0013, "num_tokens": 68359812.0, "reward": 5.960464477539063e-08, "reward_std": 0.4477081596851349, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15761104512120155, "rewards/wordcountpos_reward/raw_geo/std": 0.316092888680308, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1072.8125, "completions/mean_terminated_length": 1072.8125, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.3148629725945189, "frac_reward_zero_std": 0.0, "grad_norm": 3.3796532564270585, "kl": 0.0107879638671875, "learning_rate": 8.794863294501934e-07, "loss": 0.0348, "num_tokens": 68406281.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6505720615386963, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02912702659987597, "rewards/wordcountpos_reward/raw_geo/std": 0.07133172007978669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.2237723711142063, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 966.0625, "completions/mean_terminated_length": 966.0625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.3150630126025205, "frac_reward_zero_std": 0.0, "grad_norm": 2.7691465082822346, "kl": 0.0117340087890625, "learning_rate": 8.792722281877581e-07, "loss": 0.0392, "num_tokens": 68450602.0, "reward": 0.0, "reward_std": 0.8999190330505371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05927946503531806, "rewards/wordcountpos_reward/raw_geo/std": 0.05862487755436452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 926.375, "completions/mean_terminated_length": 926.375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.3152630526105221, "frac_reward_zero_std": 0.0, "grad_norm": 3.6246840133609366, "kl": 0.011627197265625, "learning_rate": 8.790579663706891e-07, "loss": -0.0282, "num_tokens": 68483480.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6915251016616821, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001774806966841395, "rewards/wordcountpos_reward/raw_geo/std": 0.04894336106383481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 1060.0, "completions/mean_terminated_length": 1060.0, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.3154630926185237, "frac_reward_zero_std": 0.0, "grad_norm": 3.5684484214763876, "kl": 0.0152435302734375, "learning_rate": 8.788435441034614e-07, "loss": -0.0059, "num_tokens": 68526464.0, "reward": 3.725290298461914e-09, "reward_std": 0.9793384075164795, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.18088690006088792, "rewards/wordcountpos_reward/raw_geo/std": 0.1275818870379303, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1123.4375, "completions/mean_terminated_length": 1098.3333740234375, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.3156631326265253, "frac_reward_zero_std": 0.0, "grad_norm": 3.371664310057631, "kl": 0.0129241943359375, "learning_rate": 8.786289614906283e-07, "loss": -0.0548, "num_tokens": 68565519.0, "reward": 0.0, "reward_std": 0.9364562034606934, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07425712139114941, "rewards/wordcountpos_reward/raw_geo/std": 0.11593128228835065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1083.1875, "completions/mean_terminated_length": 1083.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.3158631726345269, "frac_reward_zero_std": 0.0, "grad_norm": 2.3489322247254854, "kl": 0.0107269287109375, "learning_rate": 8.784142186368214e-07, "loss": -0.0461, "num_tokens": 68601922.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9669705629348755, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1522350613941733, "rewards/wordcountpos_reward/raw_geo/std": 0.45106490963562, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1062.8125, "completions/mean_terminated_length": 1062.8125, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.3160632126425285, "frac_reward_zero_std": 0.0, "grad_norm": 2.9011049739712456, "kl": 0.00803375244140625, "learning_rate": 8.781993156467503e-07, "loss": -0.0159, "num_tokens": 68647423.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9258589148521423, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05050123528105946, "rewards/wordcountpos_reward/raw_geo/std": 0.16432188215439258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1261.0625, "completions/mean_terminated_length": 1261.0625, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.3162632526505301, "frac_reward_zero_std": 0.0, "grad_norm": 3.2038415236144484, "kl": 0.012298583984375, "learning_rate": 8.779842526252024e-07, "loss": -0.0185, "num_tokens": 68689400.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7914847135543823, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06036636378091192, "rewards/wordcountpos_reward/raw_geo/std": 0.03642425582604647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1147.6875, "completions/mean_terminated_length": 1124.2000732421875, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.3164632926585317, "frac_reward_zero_std": 0.0, "grad_norm": 2.7746051305510844, "kl": 0.01210784912109375, "learning_rate": 8.777690296770437e-07, "loss": -0.0131, "num_tokens": 68731107.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8282698392868042, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1471876682459795, "rewards/wordcountpos_reward/raw_geo/std": 0.17817071582053842, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1186.4375, "completions/mean_terminated_length": 1043.9091796875, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.3166633326665333, "frac_reward_zero_std": 0.0, "grad_norm": 3.2264803830072477, "kl": 0.014312744140625, "learning_rate": 8.775536469072178e-07, "loss": 0.0346, "num_tokens": 68783738.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9534915089607239, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0039703081630581005, "rewards/wordcountpos_reward/raw_geo/std": 0.12009061968215361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 1119.25, "completions/mean_terminated_length": 1093.86669921875, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.3168633726745349, "frac_reward_zero_std": 0.0, "grad_norm": 2.8959232476628824, "kl": 0.0111083984375, "learning_rate": 8.77338104420746e-07, "loss": -0.0051, "num_tokens": 68827926.0, "reward": 7.450580596923828e-09, "reward_std": 0.995486855506897, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.045305738994841416, "rewards/wordcountpos_reward/raw_geo/std": 0.07667984325277001, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1011.25, "completions/mean_terminated_length": 1011.25, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.3170634126825365, "frac_reward_zero_std": 0.0, "grad_norm": 3.006943965417861, "kl": 0.012237548828125, "learning_rate": 8.771224023227284e-07, "loss": 0.0062, "num_tokens": 68868418.0, "reward": 0.0, "reward_std": 0.933379590511322, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005277191377503794, "rewards/wordcountpos_reward/raw_geo/std": 0.08754166630403425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1043.0, "completions/max_terminated_length": 1043.0, "completions/mean_length": 896.9375, "completions/mean_terminated_length": 896.9375, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.31726345269053813, "frac_reward_zero_std": 0.0, "grad_norm": 3.3322654254095223, "kl": 0.0125732421875, "learning_rate": 8.769065407183418e-07, "loss": -0.0165, "num_tokens": 68904241.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0416842699050903, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01716716227931385, "rewards/wordcountpos_reward/raw_geo/std": 0.09950324377868686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1400.125, "completions/mean_terminated_length": 1271.71435546875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.3174634926985397, "frac_reward_zero_std": 0.0, "grad_norm": 3.045622138192471, "kl": 0.0139923095703125, "learning_rate": 8.766905197128416e-07, "loss": -0.0079, "num_tokens": 68961563.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0298126935958862, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011343066012834035, "rewards/wordcountpos_reward/raw_geo/std": 0.2300492371618011, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1094.0, "completions/mean_terminated_length": 1066.933349609375, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.3176635327065413, "frac_reward_zero_std": 0.0, "grad_norm": 3.236366460327572, "kl": 0.0157928466796875, "learning_rate": 8.764743394115604e-07, "loss": 0.0143, "num_tokens": 69013195.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8382514715194702, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04073742505844089, "rewards/wordcountpos_reward/raw_geo/std": 0.032596749472514536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 831.375, "completions/mean_terminated_length": 831.375, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.3178635727145429, "frac_reward_zero_std": 0.0, "grad_norm": 2.748882608834058, "kl": 0.00759124755859375, "learning_rate": 8.762579999199089e-07, "loss": 0.0093, "num_tokens": 69044889.0, "reward": 0.0, "reward_std": 1.0451180934906006, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1258962182967499, "rewards/wordcountpos_reward/raw_geo/std": 0.051868925157152156, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1166.75, "completions/mean_terminated_length": 1166.75, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.3180636127225445, "frac_reward_zero_std": 0.0, "grad_norm": 3.333627588331488, "kl": 0.0146942138671875, "learning_rate": 8.76041501343375e-07, "loss": -0.0155, "num_tokens": 69084357.0, "reward": -7.450580596923828e-09, "reward_std": 1.0352282524108887, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07563418210521654, "rewards/wordcountpos_reward/raw_geo/std": 0.0508009172809852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 967.25, "completions/mean_terminated_length": 967.25, "completions/min_length": 471.0, "completions/min_terminated_length": 471.0, "epoch": 0.31826365273054613, "frac_reward_zero_std": 0.0, "grad_norm": 3.6457553767546815, "kl": 0.0136566162109375, "learning_rate": 8.758248437875246e-07, "loss": 0.0606, "num_tokens": 69129553.0, "reward": 2.9802322387695312e-08, "reward_std": 0.2511444687843323, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04688763187245333, "rewards/wordcountpos_reward/raw_geo/std": 0.05714744585474582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.20923139768633622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1352.75, "completions/mean_terminated_length": 1331.71435546875, "completions/min_length": 1214.0, "completions/min_terminated_length": 1214.0, "epoch": 0.3184636927385477, "frac_reward_zero_std": 0.0, "grad_norm": 2.624934650990559, "kl": 0.010589599609375, "learning_rate": 8.756080273580009e-07, "loss": -0.0062, "num_tokens": 69175045.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7568929195404053, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11011449934728144, "rewards/wordcountpos_reward/raw_geo/std": 0.4128410334402734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1091.9375, "completions/mean_terminated_length": 1033.6429443359375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.3186637327465493, "frac_reward_zero_std": 0.0, "grad_norm": 3.686019771758005, "kl": 0.0162200927734375, "learning_rate": 8.753910521605245e-07, "loss": -0.0621, "num_tokens": 69224508.0, "reward": 0.0, "reward_std": 1.0114943981170654, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15080110303794583, "rewards/wordcountpos_reward/raw_geo/std": 0.11792134965310988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1305.1875, "completions/mean_terminated_length": 1292.2000732421875, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.3188637727545509, "frac_reward_zero_std": 0.0, "grad_norm": 2.62190816484297, "kl": 0.0098114013671875, "learning_rate": 8.751739183008935e-07, "loss": -0.0184, "num_tokens": 69273975.0, "reward": -2.9802322387695312e-08, "reward_std": 0.797948956489563, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14550086044449423, "rewards/wordcountpos_reward/raw_geo/std": 0.1331604326108755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1156.375, "completions/mean_terminated_length": 1156.375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.3190638127625525, "frac_reward_zero_std": 0.0, "grad_norm": 3.4193560472601034, "kl": 0.0166778564453125, "learning_rate": 8.749566258849833e-07, "loss": -0.0388, "num_tokens": 69318165.0, "reward": 0.0, "reward_std": 0.775354266166687, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18240270207601864, "rewards/wordcountpos_reward/raw_geo/std": 0.17912395436559866, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1227.9375, "completions/mean_terminated_length": 1227.9375, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.31926385277055414, "frac_reward_zero_std": 0.0, "grad_norm": 2.955087754613613, "kl": 0.0126953125, "learning_rate": 8.747391750187468e-07, "loss": -0.0106, "num_tokens": 69369980.0, "reward": 0.0, "reward_std": 0.9563160538673401, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10828293055163338, "rewards/wordcountpos_reward/raw_geo/std": 0.14969418019126313, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1197.375, "completions/mean_terminated_length": 1177.2000732421875, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.3194638927785557, "frac_reward_zero_std": 0.0, "grad_norm": 3.4810574774930365, "kl": 0.0149383544921875, "learning_rate": 8.745215658082138e-07, "loss": 0.0408, "num_tokens": 69419618.0, "reward": 0.0, "reward_std": 0.6060514450073242, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01379265701292679, "rewards/wordcountpos_reward/raw_geo/std": 0.20018001375998773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1328.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1069.625, "completions/mean_terminated_length": 1069.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.3196639327865573, "frac_reward_zero_std": 0.0, "grad_norm": 1.805656598014149, "kl": 0.0037021636962890625, "learning_rate": 8.743037983594917e-07, "loss": -0.0172, "num_tokens": 69461620.0, "reward": 0.0, "reward_std": 0.9250842928886414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014396808622773056, "rewards/wordcountpos_reward/raw_geo/std": 0.05921011271343425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1094.1875, "completions/mean_terminated_length": 1094.1875, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.3198639727945589, "frac_reward_zero_std": 0.0, "grad_norm": 3.4907672235794642, "kl": 0.0142059326171875, "learning_rate": 8.740858727787651e-07, "loss": 0.0329, "num_tokens": 69514847.0, "reward": 0.0, "reward_std": 0.7607910633087158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.018264410001570718, "rewards/wordcountpos_reward/raw_geo/std": 0.04662311628043402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1213.6875, "completions/mean_terminated_length": 1194.60009765625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.3200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 2.8587935450601907, "kl": 0.012054443359375, "learning_rate": 8.738677891722951e-07, "loss": -0.042, "num_tokens": 69555946.0, "reward": 0.0, "reward_std": 0.8894298076629639, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011683751771228656, "rewards/wordcountpos_reward/raw_geo/std": 0.15297810295497705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1256.625, "completions/mean_terminated_length": 1175.5, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.32026405281056214, "frac_reward_zero_std": 0.0, "grad_norm": 3.253304689368561, "kl": 0.01470947265625, "learning_rate": 8.736495476464205e-07, "loss": -0.016, "num_tokens": 69609140.0, "reward": 0.0, "reward_std": 1.0084575414657593, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0862451665164458, "rewards/wordcountpos_reward/raw_geo/std": 0.07137471201934144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1318.25, "completions/mean_terminated_length": 1209.2000732421875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.3204640928185637, "frac_reward_zero_std": 0.0, "grad_norm": 3.0458765620895685, "kl": 0.0158538818359375, "learning_rate": 8.734311483075568e-07, "loss": -0.0244, "num_tokens": 69657504.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7522756457328796, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010867540296603483, "rewards/wordcountpos_reward/raw_geo/std": 0.32075806524759004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1190.875, "completions/mean_terminated_length": 1170.2667236328125, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.3206641328265653, "frac_reward_zero_std": 0.0, "grad_norm": 2.5643541801729435, "kl": 0.012420654296875, "learning_rate": 8.732125912621966e-07, "loss": -0.0068, "num_tokens": 69701310.0, "reward": 0.0, "reward_std": 1.0112640857696533, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12665527476531674, "rewards/wordcountpos_reward/raw_geo/std": 0.10002495950164786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1038.4375, "completions/mean_terminated_length": 1038.4375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.3208641728345669, "frac_reward_zero_std": 0.0, "grad_norm": 3.2329555949635624, "kl": 0.01313018798828125, "learning_rate": 8.729938766169092e-07, "loss": 0.0078, "num_tokens": 69740045.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5262272357940674, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02001174282360432, "rewards/wordcountpos_reward/raw_geo/std": 0.09209511922984874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1023.9375, "completions/mean_terminated_length": 1023.9375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.32106421284256853, "frac_reward_zero_std": 0.0, "grad_norm": 3.7755137382972395, "kl": 0.016815185546875, "learning_rate": 8.727750044783408e-07, "loss": -0.0251, "num_tokens": 69780868.0, "reward": 0.0, "reward_std": 0.9580331444740295, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06635442620368658, "rewards/wordcountpos_reward/raw_geo/std": 0.05900486823426682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 1177.4375, "completions/mean_terminated_length": 1131.357177734375, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.3212642528505701, "frac_reward_zero_std": 0.0, "grad_norm": 3.5682915419764543, "kl": 0.0165252685546875, "learning_rate": 8.725559749532145e-07, "loss": -0.0375, "num_tokens": 69834563.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6535353660583496, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2805919912373367, "rewards/wordcountpos_reward/raw_geo/std": 0.22273099265681917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1339.875, "completions/mean_terminated_length": 1179.75, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.3214642928585717, "frac_reward_zero_std": 0.0, "grad_norm": 2.8659518310960523, "kl": 0.009185791015625, "learning_rate": 8.723367881483301e-07, "loss": -0.0044, "num_tokens": 69891201.0, "reward": 0.0, "reward_std": 0.5730748176574707, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03574774335486767, "rewards/wordcountpos_reward/raw_geo/std": 0.09131875823854112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.3216643328665733, "frac_reward_zero_std": 0.0, "grad_norm": 3.56234700133416, "kl": 0.015289306640625, "learning_rate": 8.721174441705642e-07, "loss": -0.0829, "num_tokens": 69926753.0, "reward": 0.0, "reward_std": 0.6940462589263916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21455173226133972, "rewards/wordcountpos_reward/raw_geo/std": 0.15938293206465223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1220.5625, "completions/mean_terminated_length": 1220.5625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.3218643728745749, "frac_reward_zero_std": 0.0, "grad_norm": 2.5781193488214718, "kl": 0.009063720703125, "learning_rate": 8.718979431268698e-07, "loss": 0.0284, "num_tokens": 69977282.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8950475454330444, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01251881874843657, "rewards/wordcountpos_reward/raw_geo/std": 0.06520438036172192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 937.25, "completions/mean_terminated_length": 937.25, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.32206441288257653, "frac_reward_zero_std": 0.0, "grad_norm": 3.160596004016877, "kl": 0.0093231201171875, "learning_rate": 8.716782851242766e-07, "loss": -0.0117, "num_tokens": 70012326.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8929026126861572, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09306294584262934, "rewards/wordcountpos_reward/raw_geo/std": 0.07288388681667722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1289.8125, "completions/mean_terminated_length": 1275.800048828125, "completions/min_length": 1076.0, "completions/min_terminated_length": 1076.0, "epoch": 0.3222644528905781, "frac_reward_zero_std": 0.0, "grad_norm": 3.0742943218134227, "kl": 0.0139923095703125, "learning_rate": 8.714584702698908e-07, "loss": -0.0246, "num_tokens": 70058435.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6943762302398682, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10278817352013789, "rewards/wordcountpos_reward/raw_geo/std": 0.11470567582900348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1036.8125, "completions/mean_terminated_length": 1036.8125, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.3224644928985797, "frac_reward_zero_std": 0.0, "grad_norm": 3.55281022296121, "kl": 0.014251708984375, "learning_rate": 8.712384986708953e-07, "loss": -0.0169, "num_tokens": 70107696.0, "reward": 0.0, "reward_std": 0.8547852039337158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09444222534652028, "rewards/wordcountpos_reward/raw_geo/std": 0.0726889749413297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.3226645329065813, "frac_reward_zero_std": 0.0, "grad_norm": 2.8657924909863124, "kl": 0.0098419189453125, "learning_rate": 8.710183704345492e-07, "loss": 0.0184, "num_tokens": 70159304.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0441851615905762, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07690630457244645, "rewards/wordcountpos_reward/raw_geo/std": 0.07906305037403856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1131.125, "completions/mean_terminated_length": 1106.533447265625, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3228645729145829, "frac_reward_zero_std": 0.0, "grad_norm": 4.207702987523146, "kl": 0.0517730712890625, "learning_rate": 8.707980856681878e-07, "loss": -0.0444, "num_tokens": 70200002.0, "reward": -1.862645149230957e-09, "reward_std": 0.9499379992485046, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1420046854354127, "rewards/wordcountpos_reward/raw_geo/std": 0.12697468586952163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1031.0, "completions/mean_terminated_length": 1031.0, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.32306461292258454, "frac_reward_zero_std": 0.0, "grad_norm": 3.2384978616896927, "kl": 0.0124664306640625, "learning_rate": 8.705776444792232e-07, "loss": -0.0164, "num_tokens": 70234186.0, "reward": -3.725290298461914e-08, "reward_std": 1.052241325378418, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04186649685881302, "rewards/wordcountpos_reward/raw_geo/std": 0.13196762791416944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1230.3125, "completions/mean_terminated_length": 1212.3333740234375, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.3232646529305861, "frac_reward_zero_std": 0.0, "grad_norm": 2.6076732046868845, "kl": 0.0121307373046875, "learning_rate": 8.703570469751433e-07, "loss": -0.0039, "num_tokens": 70284703.0, "reward": 0.0, "reward_std": 0.7809607982635498, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10899989717424802, "rewards/wordcountpos_reward/raw_geo/std": 0.12932489352066298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 959.3125, "completions/mean_terminated_length": 959.3125, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.3234646929385877, "frac_reward_zero_std": 0.0, "grad_norm": 3.087324989361525, "kl": 0.0136871337890625, "learning_rate": 8.701362932635128e-07, "loss": -0.0306, "num_tokens": 70322604.0, "reward": 7.450580596923828e-09, "reward_std": 1.03993821144104, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.25063173637291003, "rewards/wordcountpos_reward/raw_geo/std": 0.1810272407662527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 884.75, "completions/mean_terminated_length": 884.75, "completions/min_length": 612.0, "completions/min_terminated_length": 612.0, "epoch": 0.3236647329465893, "frac_reward_zero_std": 0.0, "grad_norm": 3.8967115673213444, "kl": 0.02020263671875, "learning_rate": 8.699153834519718e-07, "loss": -0.0592, "num_tokens": 70365208.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6963033080101013, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11096943860833335, "rewards/wordcountpos_reward/raw_geo/std": 0.08204610233955069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1283.9375, "completions/mean_terminated_length": 1269.533447265625, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.32386477295459093, "frac_reward_zero_std": 0.0, "grad_norm": 3.0067059218709997, "kl": 0.01409912109375, "learning_rate": 8.696943176482372e-07, "loss": 0.0043, "num_tokens": 70412895.0, "reward": 5.960464477539063e-08, "reward_std": 0.8461166620254517, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11866382241507653, "rewards/wordcountpos_reward/raw_geo/std": 0.0785102798917892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 893.5, "completions/mean_terminated_length": 893.5, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.32406481296259254, "frac_reward_zero_std": 0.0, "grad_norm": 2.3749300901797596, "kl": 0.0144805908203125, "learning_rate": 8.694730959601017e-07, "loss": -0.1635, "num_tokens": 70447335.0, "reward": 0.0, "reward_std": 0.7249225974082947, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027121271229428656, "rewards/wordcountpos_reward/raw_geo/std": 0.1720343724491237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.19958289839896937, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1416.125, "completions/mean_terminated_length": 1396.769287109375, "completions/min_length": 1248.0, "completions/min_terminated_length": 1248.0, "epoch": 0.3242648529705941, "frac_reward_zero_std": 0.0, "grad_norm": 2.3064195962591008, "kl": 0.0077362060546875, "learning_rate": 8.692517184954339e-07, "loss": 0.007, "num_tokens": 70494705.0, "reward": 0.0, "reward_std": 0.7915651798248291, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.030017199548312057, "rewards/wordcountpos_reward/raw_geo/std": 0.0761406002201284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1026.9375, "completions/mean_terminated_length": 995.4000244140625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.3244648929785957, "frac_reward_zero_std": 0.0, "grad_norm": 3.6844588554817648, "kl": 0.017303466796875, "learning_rate": 8.690301853621783e-07, "loss": 0.0072, "num_tokens": 70531288.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0639327764511108, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020742172151316927, "rewards/wordcountpos_reward/raw_geo/std": 0.13812770632466323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.15244914148902494, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1079.3125, "completions/mean_terminated_length": 1079.3125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3246649329865973, "frac_reward_zero_std": 0.0, "grad_norm": 2.6415624385241765, "kl": 0.01348876953125, "learning_rate": 8.688084966683557e-07, "loss": -0.0148, "num_tokens": 70569341.0, "reward": 0.0, "reward_std": 0.7021139860153198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0024738637670690487, "rewards/wordcountpos_reward/raw_geo/std": 0.07208491778762689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1198.75, "completions/mean_terminated_length": 1178.666748046875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.32486497299459893, "frac_reward_zero_std": 0.0, "grad_norm": 3.1871723881040337, "kl": 0.0140838623046875, "learning_rate": 8.685866525220625e-07, "loss": 0.0193, "num_tokens": 70617985.0, "reward": 0.0, "reward_std": 0.991610050201416, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026123139112771043, "rewards/wordcountpos_reward/raw_geo/std": 0.09322527614596396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1162.0, "completions/mean_terminated_length": 1113.71435546875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.32506501300260054, "frac_reward_zero_std": 0.0, "grad_norm": 2.8886941801296477, "kl": 0.0100250244140625, "learning_rate": 8.683646530314709e-07, "loss": 0.0243, "num_tokens": 70653497.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8914742469787598, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007382820731701609, "rewards/wordcountpos_reward/raw_geo/std": 0.07023277900114537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.17018508443151817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1282.6875, "completions/mean_terminated_length": 1268.2000732421875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.3252650530106021, "frac_reward_zero_std": 0.0, "grad_norm": 2.7038608058215643, "kl": 0.0116729736328125, "learning_rate": 8.681424983048288e-07, "loss": -0.0215, "num_tokens": 70703564.0, "reward": 0.0, "reward_std": 1.025007724761963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.023557903369122793, "rewards/wordcountpos_reward/raw_geo/std": 0.2269531625116964, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1085.625, "completions/mean_terminated_length": 1085.625, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.3254650930186037, "frac_reward_zero_std": 0.0, "grad_norm": 2.870238551288568, "kl": 0.012054443359375, "learning_rate": 8.679201884504598e-07, "loss": 0.0303, "num_tokens": 70746406.0, "reward": -2.9802322387695312e-08, "reward_std": 0.811503529548645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11347384029382013, "rewards/wordcountpos_reward/raw_geo/std": 0.27566224391343047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1053.6875, "completions/mean_terminated_length": 1053.6875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.3256651330266053, "frac_reward_zero_std": 0.0, "grad_norm": 3.824436226209106, "kl": 0.018707275390625, "learning_rate": 8.676977235767632e-07, "loss": -0.0084, "num_tokens": 70782289.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9631739258766174, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.057504655034868894, "rewards/wordcountpos_reward/raw_geo/std": 0.05901597470434708, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1294.625, "completions/mean_terminated_length": 1265.2857666015625, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.32586517303460694, "frac_reward_zero_std": 0.0, "grad_norm": 1.8520271089329634, "kl": 0.00908660888671875, "learning_rate": 8.674751037922141e-07, "loss": -0.0413, "num_tokens": 70829155.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8651070594787598, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24366039398044062, "rewards/wordcountpos_reward/raw_geo/std": 0.13818310680879822, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1002.75, "completions/mean_terminated_length": 1002.75, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.32606521304260855, "frac_reward_zero_std": 0.0, "grad_norm": 4.481604806319879, "kl": 0.0293731689453125, "learning_rate": 8.672523292053627e-07, "loss": 0.0029, "num_tokens": 70877207.0, "reward": 5.960464477539063e-08, "reward_std": 0.8460835218429565, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09866266998428361, "rewards/wordcountpos_reward/raw_geo/std": 0.11878553207445729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1347.5, "completions/mean_terminated_length": 1278.181884765625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.3262652530506101, "frac_reward_zero_std": 0.0, "grad_norm": 3.2739952464531283, "kl": 0.0124359130859375, "learning_rate": 8.670293999248351e-07, "loss": -0.0361, "num_tokens": 70931015.0, "reward": 0.0, "reward_std": 0.6473633050918579, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1593686448416754, "rewards/wordcountpos_reward/raw_geo/std": 0.17186180323953784, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1277.6875, "completions/mean_terminated_length": 1104.77783203125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.3264652930586117, "frac_reward_zero_std": 0.0, "grad_norm": 2.380233976584486, "kl": 0.0093841552734375, "learning_rate": 8.668063160593323e-07, "loss": -0.0044, "num_tokens": 70970410.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8626672029495239, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.018328363456941933, "rewards/wordcountpos_reward/raw_geo/std": 0.051612424428809665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1285.625, "completions/mean_terminated_length": 1271.3333740234375, "completions/min_length": 1127.0, "completions/min_terminated_length": 1127.0, "epoch": 0.32666533306661333, "frac_reward_zero_std": 0.0, "grad_norm": 2.912150342870821, "kl": 0.0124359130859375, "learning_rate": 8.665830777176314e-07, "loss": 0.0005, "num_tokens": 71013988.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9188027381896973, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0988813874641122, "rewards/wordcountpos_reward/raw_geo/std": 0.0718615768363995, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1043.25, "completions/mean_terminated_length": 1012.800048828125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.32686537307461494, "frac_reward_zero_std": 0.0, "grad_norm": 2.6431475752789555, "kl": 0.0120849609375, "learning_rate": 8.66359685008584e-07, "loss": 0.0248, "num_tokens": 71057240.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9554632902145386, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13324790943650425, "rewards/wordcountpos_reward/raw_geo/std": 0.13668713752796496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1063.0625, "completions/mean_terminated_length": 1063.0625, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.3270654130826165, "frac_reward_zero_std": 0.0, "grad_norm": 3.1024808466646645, "kl": 0.0124053955078125, "learning_rate": 8.661361380411178e-07, "loss": -0.0439, "num_tokens": 71096953.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0119599103927612, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005895189283164446, "rewards/wordcountpos_reward/raw_geo/std": 0.038449970513415484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1145.3125, "completions/mean_terminated_length": 1121.666748046875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.3272654530906181, "frac_reward_zero_std": 0.0, "grad_norm": 2.786977243972997, "kl": 0.01043701171875, "learning_rate": 8.65912436924235e-07, "loss": -0.0457, "num_tokens": 71138254.0, "reward": 0.0, "reward_std": 0.6566910743713379, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07098212426707291, "rewards/wordcountpos_reward/raw_geo/std": 0.14603928747891723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1055.625, "completions/mean_terminated_length": 1055.625, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.3274654930986197, "frac_reward_zero_std": 0.0, "grad_norm": 3.4816506170069794, "kl": 0.015167236328125, "learning_rate": 8.656885817670135e-07, "loss": 0.0012, "num_tokens": 71178344.0, "reward": 7.450580596923828e-09, "reward_std": 1.0401662588119507, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.026643690060561238, "rewards/wordcountpos_reward/raw_geo/std": 0.08655590093713117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 969.4375, "completions/mean_terminated_length": 969.4375, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.32766553310662133, "frac_reward_zero_std": 0.0, "grad_norm": 3.4958827301426596, "kl": 0.0151519775390625, "learning_rate": 8.654645726786061e-07, "loss": 0.0137, "num_tokens": 71216191.0, "reward": 0.0, "reward_std": 1.0296348333358765, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08525755774722532, "rewards/wordcountpos_reward/raw_geo/std": 0.13090007042257243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 962.8125, "completions/mean_terminated_length": 962.8125, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.32786557311462294, "frac_reward_zero_std": 0.0, "grad_norm": 3.330979813222228, "kl": 0.013275146484375, "learning_rate": 8.652404097682405e-07, "loss": -0.0221, "num_tokens": 71247620.0, "reward": 0.0, "reward_std": 0.9763050079345703, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.034950029441197586, "rewards/wordcountpos_reward/raw_geo/std": 0.055553254731595295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 974.75, "completions/mean_terminated_length": 974.75, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.3280656131226245, "frac_reward_zero_std": 0.0, "grad_norm": 2.813197549012045, "kl": 0.009838104248046875, "learning_rate": 8.650160931452196e-07, "loss": -0.0919, "num_tokens": 71278136.0, "reward": 0.0, "reward_std": 0.9673024415969849, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027178198227300658, "rewards/wordcountpos_reward/raw_geo/std": 0.06780357145758219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.133263870794973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1106.4375, "completions/mean_terminated_length": 1106.4375, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3282656531306261, "frac_reward_zero_std": 0.0, "grad_norm": 3.4106662825780982, "kl": 0.0161590576171875, "learning_rate": 8.647916229189212e-07, "loss": 0.0031, "num_tokens": 71326951.0, "reward": 0.0, "reward_std": 1.0174760818481445, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14574068621798153, "rewards/wordcountpos_reward/raw_geo/std": 0.07431662759278944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1218.1875, "completions/mean_terminated_length": 1218.1875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.3284656931386277, "frac_reward_zero_std": 0.0, "grad_norm": 3.3698710533709826, "kl": 0.0153045654296875, "learning_rate": 8.645669991987981e-07, "loss": -0.0222, "num_tokens": 71372946.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7736269235610962, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08711889409657017, "rewards/wordcountpos_reward/raw_geo/std": 0.11308945630439972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1241.5625, "completions/mean_terminated_length": 1241.5625, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.32866573314662934, "frac_reward_zero_std": 0.0, "grad_norm": 3.5851789470653372, "kl": 0.016510009765625, "learning_rate": 8.643422220943778e-07, "loss": 0.0253, "num_tokens": 71422483.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0508407354354858, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048591729561694816, "rewards/wordcountpos_reward/raw_geo/std": 0.23428493890429034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619462, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1171.0625, "completions/mean_terminated_length": 1124.071533203125, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.32886577315463095, "frac_reward_zero_std": 0.0, "grad_norm": 3.1974760306277523, "kl": 0.0152435302734375, "learning_rate": 8.641172917152626e-07, "loss": -0.0214, "num_tokens": 71474468.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0029001235961914, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10339928629604174, "rewards/wordcountpos_reward/raw_geo/std": 0.17249413143218512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1137.625, "completions/mean_terminated_length": 1137.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.3290658131626325, "frac_reward_zero_std": 0.0, "grad_norm": 2.666674517422969, "kl": 0.0094451904296875, "learning_rate": 8.638922081711295e-07, "loss": 0.0133, "num_tokens": 71526926.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0487515926361084, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07022498912745798, "rewards/wordcountpos_reward/raw_geo/std": 0.12518156411011677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1226.8125, "completions/mean_terminated_length": 1226.8125, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.3292658531706341, "frac_reward_zero_std": 0.0, "grad_norm": 2.4692460036480885, "kl": 0.010894775390625, "learning_rate": 8.636669715717304e-07, "loss": -0.0106, "num_tokens": 71579091.0, "reward": 0.0, "reward_std": 0.8387613892555237, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17756178454481217, "rewards/wordcountpos_reward/raw_geo/std": 0.11850272511484324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 856.4375, "completions/mean_terminated_length": 856.4375, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.3294658931786357, "frac_reward_zero_std": 0.0, "grad_norm": 4.082142011814785, "kl": 0.019256591796875, "learning_rate": 8.634415820268915e-07, "loss": -0.0632, "num_tokens": 71616866.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0489314794540405, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04150882351501215, "rewards/wordcountpos_reward/raw_geo/std": 0.11087506226723201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1241.4285888671875, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.32966593318663734, "frac_reward_zero_std": 0.0, "grad_norm": 3.1454323935490933, "kl": 0.012237548828125, "learning_rate": 8.63216039646514e-07, "loss": 0.0119, "num_tokens": 71659750.0, "reward": 0.0, "reward_std": 0.4110639691352844, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1701982890269431, "rewards/wordcountpos_reward/raw_geo/std": 0.21120580793522808, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1215.875, "completions/mean_terminated_length": 1196.933349609375, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.32986597319463895, "frac_reward_zero_std": 0.0, "grad_norm": 3.140199414086592, "kl": 0.013336181640625, "learning_rate": 8.629903445405733e-07, "loss": 0.0313, "num_tokens": 71696484.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0495773553848267, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07906567484375807, "rewards/wordcountpos_reward/raw_geo/std": 0.10252425199351857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1217.125, "completions/mean_terminated_length": 1217.125, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.3300660132026405, "frac_reward_zero_std": 0.0, "grad_norm": 3.013047334975712, "kl": 0.0157928466796875, "learning_rate": 8.627644968191195e-07, "loss": -0.0169, "num_tokens": 71738174.0, "reward": 0.0, "reward_std": 0.7353625893592834, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11402812388826357, "rewards/wordcountpos_reward/raw_geo/std": 0.15007952425869373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1288.625, "completions/mean_terminated_length": 1274.533447265625, "completions/min_length": 1113.0, "completions/min_terminated_length": 1113.0, "epoch": 0.3302660532106421, "frac_reward_zero_std": 0.0, "grad_norm": 2.853452778109157, "kl": 0.013458251953125, "learning_rate": 8.625384965922767e-07, "loss": 0.0043, "num_tokens": 71791224.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9514033794403076, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05407114467535135, "rewards/wordcountpos_reward/raw_geo/std": 0.04370856550435641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23122059372591136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1049.1875, "completions/mean_terminated_length": 1049.1875, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.33046609321864373, "frac_reward_zero_std": 0.0, "grad_norm": 3.6551948462596764, "kl": 0.015472412109375, "learning_rate": 8.623123439702435e-07, "loss": 0.053, "num_tokens": 71838875.0, "reward": 1.4901161193847656e-08, "reward_std": 1.035784125328064, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20947465513796806, "rewards/wordcountpos_reward/raw_geo/std": 0.04648318660357534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1065.4615478515625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.33066613322664534, "frac_reward_zero_std": 0.0, "grad_norm": 3.419436734271246, "kl": 0.0164947509765625, "learning_rate": 8.620860390632935e-07, "loss": -0.0666, "num_tokens": 71879986.0, "reward": 0.0, "reward_std": 0.8505703210830688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15760804734126216, "rewards/wordcountpos_reward/raw_geo/std": 0.23067096221501385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1017.5625, "completions/mean_terminated_length": 1017.5625, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.33086617323464695, "frac_reward_zero_std": 0.0, "grad_norm": 3.7391046197896953, "kl": 0.014678955078125, "learning_rate": 8.618595819817736e-07, "loss": -0.0155, "num_tokens": 71918491.0, "reward": 7.450580596923828e-09, "reward_std": 1.0449975728988647, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06934497790695482, "rewards/wordcountpos_reward/raw_geo/std": 0.0976157506498915, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1169.4375, "completions/mean_terminated_length": 1169.4375, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.3310662132426485, "frac_reward_zero_std": 0.0, "grad_norm": 3.093848017971346, "kl": 0.012908935546875, "learning_rate": 8.616329728361055e-07, "loss": -0.0028, "num_tokens": 71966698.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9457998871803284, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.049177681280332036, "rewards/wordcountpos_reward/raw_geo/std": 0.07021265181511138, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1248.625, "completions/mean_terminated_length": 1231.86669921875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3312662532506501, "frac_reward_zero_std": 0.0, "grad_norm": 2.6797721300755004, "kl": 0.0099334716796875, "learning_rate": 8.614062117367846e-07, "loss": -0.0044, "num_tokens": 72017660.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9649055004119873, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12770360326382632, "rewards/wordcountpos_reward/raw_geo/std": 0.10364378276182498, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1095.9375, "completions/mean_terminated_length": 1095.9375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.33146629325865173, "frac_reward_zero_std": 0.0, "grad_norm": 3.6198036608850663, "kl": 0.0180816650390625, "learning_rate": 8.611792987943808e-07, "loss": -0.0333, "num_tokens": 72068851.0, "reward": 0.0, "reward_std": 0.9832199811935425, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.033657054734761806, "rewards/wordcountpos_reward/raw_geo/std": 0.21148635202019553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1068.25, "completions/mean_terminated_length": 1039.4666748046875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.33166633326665335, "frac_reward_zero_std": 0.0, "grad_norm": 2.7339064249219756, "kl": 0.0131072998046875, "learning_rate": 8.609522341195379e-07, "loss": 0.0261, "num_tokens": 72099087.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9827046394348145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15818980448217593, "rewards/wordcountpos_reward/raw_geo/std": 0.2720681647812184, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1374.4375, "completions/mean_terminated_length": 1317.3636474609375, "completions/min_length": 1154.0, "completions/min_terminated_length": 1154.0, "epoch": 0.33186637327465496, "frac_reward_zero_std": 0.0, "grad_norm": 3.1705661389213136, "kl": 0.014862060546875, "learning_rate": 8.607250178229737e-07, "loss": 0.0129, "num_tokens": 72146886.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9550391435623169, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1279016708894423, "rewards/wordcountpos_reward/raw_geo/std": 0.13708704452196357, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1088.4375, "completions/mean_terminated_length": 1088.4375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.3320664132826565, "frac_reward_zero_std": 0.0, "grad_norm": 3.158068988013846, "kl": 0.01239013671875, "learning_rate": 8.604976500154799e-07, "loss": -0.0212, "num_tokens": 72189541.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0630125999450684, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021816352598230483, "rewards/wordcountpos_reward/raw_geo/std": 0.07414967833763413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1283.5625, "completions/mean_terminated_length": 1211.416748046875, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.3322664532906581, "frac_reward_zero_std": 0.0, "grad_norm": 3.45457921404488, "kl": 0.0171356201171875, "learning_rate": 8.602701308079217e-07, "loss": 0.0152, "num_tokens": 72242542.0, "reward": -5.960464477539063e-08, "reward_std": 0.8233770728111267, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03192580890506957, "rewards/wordcountpos_reward/raw_geo/std": 0.06079735098621222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1202.375, "completions/mean_terminated_length": 1182.533447265625, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.33246649329865974, "frac_reward_zero_std": 0.0, "grad_norm": 3.03387048744309, "kl": 0.013702392578125, "learning_rate": 8.600424603112391e-07, "loss": 0.0431, "num_tokens": 72281532.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9889223575592041, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07276839249551871, "rewards/wordcountpos_reward/raw_geo/std": 0.07919626438968685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1178.0625, "completions/mean_terminated_length": 1178.0625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.33266653330666135, "frac_reward_zero_std": 0.0, "grad_norm": 2.8906268944682085, "kl": 0.0111083984375, "learning_rate": 8.598146386364447e-07, "loss": -0.0403, "num_tokens": 72325597.0, "reward": -1.4901161193847656e-08, "reward_std": 1.067464828491211, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022731399097937005, "rewards/wordcountpos_reward/raw_geo/std": 0.11610339578448907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1113.8125, "completions/mean_terminated_length": 1088.0667724609375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.3328665733146629, "frac_reward_zero_std": 0.0, "grad_norm": 2.31868360429403, "kl": 0.009456634521484375, "learning_rate": 8.59586665894626e-07, "loss": 0.0196, "num_tokens": 72357778.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9441494941711426, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15478742742293902, "rewards/wordcountpos_reward/raw_geo/std": 0.09099926630773468, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 979.3125, "completions/mean_terminated_length": 979.3125, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.3330666133226645, "frac_reward_zero_std": 0.0, "grad_norm": 3.673043262217543, "kl": 0.01153564453125, "learning_rate": 8.59358542196943e-07, "loss": 0.0285, "num_tokens": 72391511.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9658478498458862, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10427420556801044, "rewards/wordcountpos_reward/raw_geo/std": 0.1122056485688583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0709720863229836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1269.6875, "completions/mean_terminated_length": 1192.916748046875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.33326665333066613, "frac_reward_zero_std": 0.0, "grad_norm": 2.3573459374820165, "kl": 0.01042938232421875, "learning_rate": 8.591302676546302e-07, "loss": 0.0106, "num_tokens": 72443762.0, "reward": 0.0, "reward_std": 0.9953340291976929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12791598577088648, "rewards/wordcountpos_reward/raw_geo/std": 0.1395054680612004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1036.8125, "completions/mean_terminated_length": 1036.8125, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.33346669333866774, "frac_reward_zero_std": 0.0, "grad_norm": 3.0687246191093975, "kl": 0.00966644287109375, "learning_rate": 8.589018423789951e-07, "loss": 0.0199, "num_tokens": 72475327.0, "reward": -3.3527612686157227e-08, "reward_std": 1.0489134788513184, "rewards/wordcountpos_reward/mean": -3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07922752826829943, "rewards/wordcountpos_reward/raw_geo/std": 0.05048213869472424, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1068.0, "completions/mean_terminated_length": 1068.0, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.33366673334666935, "frac_reward_zero_std": 0.0, "grad_norm": 2.9688096576671072, "kl": 0.0144195556640625, "learning_rate": 8.586732664814189e-07, "loss": 0.0163, "num_tokens": 72520543.0, "reward": 0.0, "reward_std": 0.9736728668212891, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13445471339927012, "rewards/wordcountpos_reward/raw_geo/std": 0.11353455947446682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 968.875, "completions/mean_terminated_length": 968.875, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.3338667733546709, "frac_reward_zero_std": 0.0, "grad_norm": 4.011029033161554, "kl": 0.02215576171875, "learning_rate": 8.584445400733564e-07, "loss": -0.0123, "num_tokens": 72568885.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9159786701202393, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19224829629672893, "rewards/wordcountpos_reward/raw_geo/std": 0.3139140649613301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1436.75, "completions/mean_terminated_length": 1387.5555419921875, "completions/min_length": 1247.0, "completions/min_terminated_length": 1247.0, "epoch": 0.3340668133626725, "frac_reward_zero_std": 0.0, "grad_norm": 2.797773224926699, "kl": 0.0137176513671875, "learning_rate": 8.582156632663356e-07, "loss": 0.0125, "num_tokens": 72620753.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0462162494659424, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014076951822259275, "rewards/wordcountpos_reward/raw_geo/std": 0.09544848809223545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 949.0, "completions/mean_terminated_length": 949.0, "completions/min_length": 533.0, "completions/min_terminated_length": 533.0, "epoch": 0.33426685337067413, "frac_reward_zero_std": 0.0, "grad_norm": 3.8084494983363233, "kl": 0.017730712890625, "learning_rate": 8.579866361719575e-07, "loss": -0.0403, "num_tokens": 72654745.0, "reward": 0.0, "reward_std": 0.6010411977767944, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07513890144512154, "rewards/wordcountpos_reward/raw_geo/std": 0.1124821009424362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1244.0, "completions/mean_terminated_length": 1226.933349609375, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.33446689337867574, "frac_reward_zero_std": 0.0, "grad_norm": 3.195706676565709, "kl": 0.0156707763671875, "learning_rate": 8.577574589018974e-07, "loss": 0.0013, "num_tokens": 72704057.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8122002482414246, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10090319999582571, "rewards/wordcountpos_reward/raw_geo/std": 0.08906120160618294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1060.5625, "completions/mean_terminated_length": 1060.5625, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.33466693338667736, "frac_reward_zero_std": 0.0, "grad_norm": 1.9792483144032362, "kl": 0.006511688232421875, "learning_rate": 8.575281315679027e-07, "loss": -0.0146, "num_tokens": 72752170.0, "reward": 0.0, "reward_std": 0.8763498067855835, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004752864950806888, "rewards/wordcountpos_reward/raw_geo/std": 0.10706151939937984, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1255.125, "completions/mean_terminated_length": 1010.25, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3348669733946789, "frac_reward_zero_std": 0.0, "grad_norm": 2.41506697906152, "kl": 0.0066127777099609375, "learning_rate": 8.572986542817948e-07, "loss": -0.0485, "num_tokens": 72794364.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5844442844390869, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09382704979471018, "rewards/wordcountpos_reward/raw_geo/std": 0.08939381503766418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1188.0625, "completions/mean_terminated_length": 1116.076904296875, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.3350670134026805, "frac_reward_zero_std": 0.0, "grad_norm": 2.7824489420780694, "kl": 0.011199951171875, "learning_rate": 8.570690271554674e-07, "loss": 0.0587, "num_tokens": 72840645.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9770321846008301, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09799653364933583, "rewards/wordcountpos_reward/raw_geo/std": 0.10747258111910908, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1020.0625, "completions/mean_terminated_length": 1020.0625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.33526705341068214, "frac_reward_zero_std": 0.0, "grad_norm": 3.172385655854961, "kl": 0.017578125, "learning_rate": 8.56839250300888e-07, "loss": -0.0059, "num_tokens": 72872470.0, "reward": 2.60770320892334e-08, "reward_std": 1.0450825691223145, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010496641159481056, "rewards/wordcountpos_reward/raw_geo/std": 0.05656542586081801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 948.875, "completions/mean_terminated_length": 948.875, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.33546709341868375, "frac_reward_zero_std": 0.0, "grad_norm": 3.5117266991860214, "kl": 0.01335906982421875, "learning_rate": 8.566093238300968e-07, "loss": -0.0167, "num_tokens": 72924500.0, "reward": -5.960464477539063e-08, "reward_std": 0.6853330135345459, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021708956404889006, "rewards/wordcountpos_reward/raw_geo/std": 0.04958926849206862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1813529401164726, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1185.625, "completions/mean_terminated_length": 1164.666748046875, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.33566713342668536, "frac_reward_zero_std": 0.0, "grad_norm": 3.6184405195915135, "kl": 0.020050048828125, "learning_rate": 8.563792478552071e-07, "loss": -0.0289, "num_tokens": 72972318.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9075536727905273, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06601963105528937, "rewards/wordcountpos_reward/raw_geo/std": 0.09386959513664175, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1234.0625, "completions/mean_terminated_length": 1196.071533203125, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.3358671734346869, "frac_reward_zero_std": 0.0, "grad_norm": 2.9894226162165976, "kl": 0.01361083984375, "learning_rate": 8.561490224884049e-07, "loss": -0.0047, "num_tokens": 73021471.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7478127479553223, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2846949863287823, "rewards/wordcountpos_reward/raw_geo/std": 0.15734597264230835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 950.5625, "completions/mean_terminated_length": 950.5625, "completions/min_length": 677.0, "completions/min_terminated_length": 677.0, "epoch": 0.33606721344268853, "frac_reward_zero_std": 0.0, "grad_norm": 3.489116698625775, "kl": 0.0149993896484375, "learning_rate": 8.559186478419492e-07, "loss": -0.0272, "num_tokens": 73067536.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4865996241569519, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12460323807770043, "rewards/wordcountpos_reward/raw_geo/std": 0.16504823488215692, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1166.1875, "completions/mean_terminated_length": 1166.1875, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.33626725345069014, "frac_reward_zero_std": 0.0, "grad_norm": 3.0307864938069398, "kl": 0.0145263671875, "learning_rate": 8.556881240281715e-07, "loss": -0.0103, "num_tokens": 73115451.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0014188289642334, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02608589558019482, "rewards/wordcountpos_reward/raw_geo/std": 0.08555588627798585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1178.5, "completions/mean_terminated_length": 1157.0667724609375, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.33646729345869175, "frac_reward_zero_std": 0.0, "grad_norm": 2.8457920750430805, "kl": 0.010711669921875, "learning_rate": 8.554574511594766e-07, "loss": -0.0615, "num_tokens": 73163363.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9478765726089478, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05854922238833614, "rewards/wordcountpos_reward/raw_geo/std": 0.0657059204017052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1086.375, "completions/mean_terminated_length": 1086.375, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.33666733346669336, "frac_reward_zero_std": 0.0, "grad_norm": 3.5279214912253614, "kl": 0.0228271484375, "learning_rate": 8.552266293483415e-07, "loss": -0.0277, "num_tokens": 73212097.0, "reward": 0.0, "reward_std": 0.6660162210464478, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.141658077100029, "rewards/wordcountpos_reward/raw_geo/std": 0.10384053885566608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1079.0625, "completions/mean_terminated_length": 1079.0625, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.3368673734746949, "frac_reward_zero_std": 0.0, "grad_norm": 3.6314944215100384, "kl": 0.016357421875, "learning_rate": 8.549956587073157e-07, "loss": -0.0604, "num_tokens": 73242778.0, "reward": 0.0, "reward_std": 0.6610418558120728, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06343283291496019, "rewards/wordcountpos_reward/raw_geo/std": 0.06877806093514871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1259.625, "completions/mean_terminated_length": 1243.60009765625, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.33706741348269653, "frac_reward_zero_std": 0.0, "grad_norm": 2.3939930375939507, "kl": 0.013427734375, "learning_rate": 8.547645393490218e-07, "loss": 0.0446, "num_tokens": 73289444.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9426002502441406, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.042173102293494705, "rewards/wordcountpos_reward/raw_geo/std": 0.09454198432796888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1078.625, "completions/mean_terminated_length": 1018.4285888671875, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.33726745349069814, "frac_reward_zero_std": 0.0, "grad_norm": 3.2959746657704767, "kl": 0.0160369873046875, "learning_rate": 8.545332713861547e-07, "loss": -0.0589, "num_tokens": 73336646.0, "reward": 5.960464477539063e-08, "reward_std": 0.7477917671203613, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1673657800842179, "rewards/wordcountpos_reward/raw_geo/std": 0.12065034760598435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1001.727294921875, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.33746749349869976, "frac_reward_zero_std": 0.0, "grad_norm": 3.3433469983300785, "kl": 0.013946533203125, "learning_rate": 8.543018549314817e-07, "loss": 0.0257, "num_tokens": 73380509.0, "reward": 0.0, "reward_std": 0.8304761648178101, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09634061591515866, "rewards/wordcountpos_reward/raw_geo/std": 0.08681559871076935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1006.9375, "completions/mean_terminated_length": 1006.9375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.33766753350670137, "frac_reward_zero_std": 0.0, "grad_norm": 3.5503258987273845, "kl": 0.0174102783203125, "learning_rate": 8.540702900978424e-07, "loss": -0.0212, "num_tokens": 73414732.0, "reward": -3.725290298461914e-09, "reward_std": 1.0035524368286133, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07777010897557533, "rewards/wordcountpos_reward/raw_geo/std": 0.06675779960420548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1236.5625, "completions/mean_terminated_length": 1219.0001220703125, "completions/min_length": 1015.0, "completions/min_terminated_length": 1015.0, "epoch": 0.3378675735147029, "frac_reward_zero_std": 0.0, "grad_norm": 2.5994790645175265, "kl": 0.010589599609375, "learning_rate": 8.538385769981488e-07, "loss": 0.0143, "num_tokens": 73465677.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6215928792953491, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008066809599905873, "rewards/wordcountpos_reward/raw_geo/std": 0.10528487473474683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1032.125, "completions/mean_terminated_length": 1032.125, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.33806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 3.255731032663374, "kl": 0.0111083984375, "learning_rate": 8.536067157453854e-07, "loss": 0.0023, "num_tokens": 73504783.0, "reward": 0.0, "reward_std": 0.9578646421432495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04860542256463814, "rewards/wordcountpos_reward/raw_geo/std": 0.10861274109979441, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 1000.375, "completions/mean_terminated_length": 1000.375, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.33826765353070615, "frac_reward_zero_std": 0.0, "grad_norm": 3.486230219453571, "kl": 0.0286865234375, "learning_rate": 8.533747064526087e-07, "loss": 0.0086, "num_tokens": 73535605.0, "reward": 0.0, "reward_std": 1.0157032012939453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.23042252016333845, "rewards/wordcountpos_reward/raw_geo/std": 0.22507826234505102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1125.4375, "completions/mean_terminated_length": 1100.4666748046875, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.33846769353870776, "frac_reward_zero_std": 0.0, "grad_norm": 3.529825517699508, "kl": 0.01629638671875, "learning_rate": 8.531425492329474e-07, "loss": -0.0021, "num_tokens": 73578620.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4763606786727905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0594400785715604, "rewards/wordcountpos_reward/raw_geo/std": 0.16236111573416034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1205.3125, "completions/mean_terminated_length": 1205.3125, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.3386677335467093, "frac_reward_zero_std": 0.0, "grad_norm": 3.3472536656699363, "kl": 0.016021728515625, "learning_rate": 8.529102441996028e-07, "loss": -0.0059, "num_tokens": 73631113.0, "reward": 0.0, "reward_std": 0.9410836696624756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08334570965803832, "rewards/wordcountpos_reward/raw_geo/std": 0.042396005404711064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1047.875, "completions/mean_terminated_length": 1047.875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.3388677735547109, "frac_reward_zero_std": 0.0, "grad_norm": 3.174520829051601, "kl": 0.0142669677734375, "learning_rate": 8.526777914658475e-07, "loss": -0.0349, "num_tokens": 73672167.0, "reward": 0.0, "reward_std": 0.6043229699134827, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13830292763916593, "rewards/wordcountpos_reward/raw_geo/std": 0.08418298029182496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1100.75, "completions/mean_terminated_length": 1074.1334228515625, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.33906781356271254, "frac_reward_zero_std": 0.0, "grad_norm": 3.53477459479101, "kl": 0.0135650634765625, "learning_rate": 8.524451911450268e-07, "loss": -0.0091, "num_tokens": 73706739.0, "reward": 7.450580596923828e-09, "reward_std": 1.000130295753479, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.054692457934656026, "rewards/wordcountpos_reward/raw_geo/std": 0.07604195782104117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1096.8125, "completions/mean_terminated_length": 1069.933349609375, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.33926785357071415, "frac_reward_zero_std": 0.0, "grad_norm": 3.285804865181038, "kl": 0.01416015625, "learning_rate": 8.522124433505574e-07, "loss": -0.0114, "num_tokens": 73738840.0, "reward": 0.0, "reward_std": 0.7916179895401001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10394018309378934, "rewards/wordcountpos_reward/raw_geo/std": 0.14658636796849137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1251.5625, "completions/mean_terminated_length": 1194.2308349609375, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.33946789357871576, "frac_reward_zero_std": 0.0, "grad_norm": 3.224388140361333, "kl": 0.0167694091796875, "learning_rate": 8.519795481959283e-07, "loss": -0.027, "num_tokens": 73784249.0, "reward": 0.0, "reward_std": 0.6722656488418579, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029178315980885165, "rewards/wordcountpos_reward/raw_geo/std": 0.15951085033878243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.18534252575124754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1094.125, "completions/mean_terminated_length": 1094.125, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.3396679335867173, "frac_reward_zero_std": 0.0, "grad_norm": 3.0254649778119687, "kl": 0.0140228271484375, "learning_rate": 8.517465057947004e-07, "loss": -0.055, "num_tokens": 73824675.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8562289476394653, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03133500735859041, "rewards/wordcountpos_reward/raw_geo/std": 0.12597710423371797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 1059.375, "completions/mean_terminated_length": 1030.0, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.33986797359471893, "frac_reward_zero_std": 0.0, "grad_norm": 3.712921333004163, "kl": 0.01666259765625, "learning_rate": 8.51513316260506e-07, "loss": 0.0206, "num_tokens": 73862577.0, "reward": 0.0, "reward_std": 0.8229051232337952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.028947265612873946, "rewards/wordcountpos_reward/raw_geo/std": 0.3676386182022282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1107.1875, "completions/mean_terminated_length": 1107.1875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.34006801360272054, "frac_reward_zero_std": 0.0, "grad_norm": 3.3042779721581548, "kl": 0.0140380859375, "learning_rate": 8.512799797070492e-07, "loss": -0.0341, "num_tokens": 73905756.0, "reward": 0.0, "reward_std": 0.9211150407791138, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06485066539210994, "rewards/wordcountpos_reward/raw_geo/std": 0.1920853186133484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 943.0, "completions/mean_terminated_length": 943.0, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.34026805361072215, "frac_reward_zero_std": 0.0, "grad_norm": 3.916835334439108, "kl": 0.019866943359375, "learning_rate": 8.510464962481065e-07, "loss": -0.009, "num_tokens": 73947964.0, "reward": 0.0, "reward_std": 0.8240481019020081, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006151566162790921, "rewards/wordcountpos_reward/raw_geo/std": 0.04093682921242568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1189459883650901, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1138.625, "completions/mean_terminated_length": 1114.533447265625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.34046809361872377, "frac_reward_zero_std": 0.0, "grad_norm": 3.213841139030795, "kl": 0.015380859375, "learning_rate": 8.508128659975251e-07, "loss": 0.0058, "num_tokens": 73999462.0, "reward": 7.450580596923828e-09, "reward_std": 0.9345962405204773, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08579745101675412, "rewards/wordcountpos_reward/raw_geo/std": 0.1978860177849229, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1373.125, "completions/mean_terminated_length": 1246.25, "completions/min_length": 1186.0, "completions/min_terminated_length": 1186.0, "epoch": 0.3406681336267253, "frac_reward_zero_std": 0.0, "grad_norm": 2.721793437241765, "kl": 0.011199951171875, "learning_rate": 8.505790890692243e-07, "loss": 0.0242, "num_tokens": 74049808.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5063276290893555, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06569655842362646, "rewards/wordcountpos_reward/raw_geo/std": 0.07940958306986136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1081.375, "completions/mean_terminated_length": 1081.375, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.34086817363472693, "frac_reward_zero_std": 0.0, "grad_norm": 2.863599579829338, "kl": 0.01253509521484375, "learning_rate": 8.503451655771948e-07, "loss": 0.0339, "num_tokens": 74094214.0, "reward": 0.0, "reward_std": 0.6448909640312195, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06364293123674554, "rewards/wordcountpos_reward/raw_geo/std": 0.2746360791593829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1046.0, "completions/mean_terminated_length": 1046.0, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.34106821364272855, "frac_reward_zero_std": 0.0, "grad_norm": 3.267563112490123, "kl": 0.01422119140625, "learning_rate": 8.501110956354988e-07, "loss": -0.0126, "num_tokens": 74129758.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9310984015464783, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0129437121561379, "rewards/wordcountpos_reward/raw_geo/std": 0.07333047372044844, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1097.875, "completions/mean_terminated_length": 1071.0667724609375, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.34126825365073016, "frac_reward_zero_std": 0.0, "grad_norm": 3.4757385019163225, "kl": 0.0163726806640625, "learning_rate": 8.498768793582696e-07, "loss": 0.022, "num_tokens": 74171484.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9566295146942139, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0771710115266427, "rewards/wordcountpos_reward/raw_geo/std": 0.07190161675473866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1274.625, "completions/mean_terminated_length": 1259.60009765625, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.34146829365873177, "frac_reward_zero_std": 0.0, "grad_norm": 3.1815076127478883, "kl": 0.0146331787109375, "learning_rate": 8.496425168597123e-07, "loss": 0.03, "num_tokens": 74218190.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9715590476989746, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10582743987064228, "rewards/wordcountpos_reward/raw_geo/std": 0.12909915050369855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1034.0, "completions/max_terminated_length": 1034.0, "completions/mean_length": 824.5625, "completions/mean_terminated_length": 824.5625, "completions/min_length": 507.0, "completions/min_terminated_length": 507.0, "epoch": 0.3416683336667333, "frac_reward_zero_std": 0.0, "grad_norm": 4.0636772608073155, "kl": 0.02032470703125, "learning_rate": 8.494080082541033e-07, "loss": -0.0273, "num_tokens": 74258807.0, "reward": 0.0, "reward_std": 1.055046558380127, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.041128203939138915, "rewards/wordcountpos_reward/raw_geo/std": 0.04960585220645909, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1216.1875, "completions/mean_terminated_length": 1216.1875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.34186837367473494, "frac_reward_zero_std": 0.0, "grad_norm": 2.235041387731442, "kl": 0.0081634521484375, "learning_rate": 8.491733536557897e-07, "loss": -0.0322, "num_tokens": 74297890.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8786622285842896, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027645158851543762, "rewards/wordcountpos_reward/raw_geo/std": 0.13120135019487306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1197.6875, "completions/mean_terminated_length": 1197.6875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.34206841368273655, "frac_reward_zero_std": 0.0, "grad_norm": 3.593822411384142, "kl": 0.015533447265625, "learning_rate": 8.489385531791906e-07, "loss": -0.011, "num_tokens": 74337061.0, "reward": 0.0, "reward_std": 0.718343198299408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13991598180600234, "rewards/wordcountpos_reward/raw_geo/std": 0.15815555024164615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1200.75, "completions/mean_terminated_length": 1064.727294921875, "completions/min_length": 602.0, "completions/min_terminated_length": 602.0, "epoch": 0.34226845369073816, "frac_reward_zero_std": 0.0, "grad_norm": 3.1073338675302495, "kl": 0.01348876953125, "learning_rate": 8.487036069387953e-07, "loss": -0.0578, "num_tokens": 74388465.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0315226316452026, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022338886325019545, "rewards/wordcountpos_reward/raw_geo/std": 0.0532303633532224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1002.625, "completions/mean_terminated_length": 1002.625, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.3424684936987398, "frac_reward_zero_std": 0.0, "grad_norm": 3.5130571894412803, "kl": 0.0177001953125, "learning_rate": 8.484685150491649e-07, "loss": -0.0516, "num_tokens": 74440491.0, "reward": 7.450580596923828e-09, "reward_std": 1.0216412544250488, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04038896652015001, "rewards/wordcountpos_reward/raw_geo/std": 0.0636367448765158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1194.5625, "completions/mean_terminated_length": 1194.5625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.34266853370674133, "frac_reward_zero_std": 0.0, "grad_norm": 3.4022515331729477, "kl": 0.01947021484375, "learning_rate": 8.482332776249314e-07, "loss": -0.0066, "num_tokens": 74489412.0, "reward": 0.0, "reward_std": 0.916829526424408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06301875877379064, "rewards/wordcountpos_reward/raw_geo/std": 0.1003141807825282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1057600358603626, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1040.125, "completions/mean_terminated_length": 1009.4667358398438, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.34286857371474294, "frac_reward_zero_std": 0.0, "grad_norm": 3.4498298391737947, "kl": 0.0135040283203125, "learning_rate": 8.479978947807975e-07, "loss": 0.0049, "num_tokens": 74534982.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4400555193424225, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05426460704411065, "rewards/wordcountpos_reward/raw_geo/std": 0.24999964137988331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14395215254459456, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1216.5625, "completions/mean_terminated_length": 1122.0833740234375, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.34306861372274455, "frac_reward_zero_std": 0.0, "grad_norm": 3.64906445105514, "kl": 0.01861572265625, "learning_rate": 8.477623666315367e-07, "loss": 0.0323, "num_tokens": 74586655.0, "reward": 0.0, "reward_std": 0.8644471168518066, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1158785271774045, "rewards/wordcountpos_reward/raw_geo/std": 0.13817021228847876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1049.2857666015625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.34326865373074616, "frac_reward_zero_std": 0.0, "grad_norm": 3.4423004432671944, "kl": 0.0144805908203125, "learning_rate": 8.475266932919938e-07, "loss": 0.0119, "num_tokens": 74626809.0, "reward": 0.0, "reward_std": 0.8821020126342773, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10427966120452759, "rewards/wordcountpos_reward/raw_geo/std": 0.07495030962347883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 963.0, "completions/mean_terminated_length": 963.0, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.3434686937387478, "frac_reward_zero_std": 0.0, "grad_norm": 2.9666809465990736, "kl": 0.010528564453125, "learning_rate": 8.472908748770844e-07, "loss": -0.0108, "num_tokens": 74655441.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0130188465118408, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09752729899777007, "rewards/wordcountpos_reward/raw_geo/std": 0.1137234694640579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1114.625, "completions/mean_terminated_length": 1114.625, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.34366873374674933, "frac_reward_zero_std": 0.0, "grad_norm": 3.497303787984739, "kl": 0.01800537109375, "learning_rate": 8.470549115017944e-07, "loss": 0.0256, "num_tokens": 74710075.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9461817741394043, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05622239683286784, "rewards/wordcountpos_reward/raw_geo/std": 0.08157377962975434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15939701191492708, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1295.9375, "completions/mean_terminated_length": 1203.181884765625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.34386877375475094, "frac_reward_zero_std": 0.0, "grad_norm": 3.179209510266924, "kl": 0.01763916015625, "learning_rate": 8.468188032811806e-07, "loss": 0.0297, "num_tokens": 74755018.0, "reward": 2.9802322387695312e-08, "reward_std": 0.37994059920310974, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1355370921292624, "rewards/wordcountpos_reward/raw_geo/std": 0.21804350176150675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1220.75, "completions/mean_terminated_length": 1220.75, "completions/min_length": 1066.0, "completions/min_terminated_length": 1066.0, "epoch": 0.34406881376275256, "frac_reward_zero_std": 0.0, "grad_norm": 2.5203248021143954, "kl": 0.0106048583984375, "learning_rate": 8.465825503303705e-07, "loss": 0.0075, "num_tokens": 74799982.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0460631847381592, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012841327235359567, "rewards/wordcountpos_reward/raw_geo/std": 0.21718124609126793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1268.375, "completions/mean_terminated_length": 1252.933349609375, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.34426885377075417, "frac_reward_zero_std": 0.0, "grad_norm": 2.777431283159697, "kl": 0.0111236572265625, "learning_rate": 8.463461527645621e-07, "loss": -0.0117, "num_tokens": 74841892.0, "reward": -2.9802322387695312e-08, "reward_std": 1.00283944606781, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0600485330930281, "rewards/wordcountpos_reward/raw_geo/std": 0.049325751004679286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1093.75, "completions/mean_terminated_length": 1093.75, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.3444688937787557, "frac_reward_zero_std": 0.0, "grad_norm": 3.63843566419292, "kl": 0.015838623046875, "learning_rate": 8.461096106990241e-07, "loss": -0.0018, "num_tokens": 74878824.0, "reward": 0.0, "reward_std": 0.7780653238296509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061234134472009136, "rewards/wordcountpos_reward/raw_geo/std": 0.29139884303553054, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1196.125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.34466893378675734, "frac_reward_zero_std": 0.0, "grad_norm": 2.6202245138556237, "kl": 0.0119781494140625, "learning_rate": 8.458729242490951e-07, "loss": 0.0139, "num_tokens": 74922882.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8111221790313721, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.056166992934110024, "rewards/wordcountpos_reward/raw_geo/std": 0.09104621663299794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 896.625, "completions/mean_terminated_length": 896.625, "completions/min_length": 504.0, "completions/min_terminated_length": 504.0, "epoch": 0.34486897379475895, "frac_reward_zero_std": 0.0, "grad_norm": 4.12774794273736, "kl": 0.016754150390625, "learning_rate": 8.456360935301849e-07, "loss": 0.0251, "num_tokens": 74973724.0, "reward": -7.450580596923828e-09, "reward_std": 0.9670534133911133, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.008813059183389977, "rewards/wordcountpos_reward/raw_geo/std": 0.017488180877025798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1261.875, "completions/mean_terminated_length": 1227.857177734375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.34506901380276056, "frac_reward_zero_std": 0.0, "grad_norm": 2.359533574364688, "kl": 0.0131378173828125, "learning_rate": 8.453991186577727e-07, "loss": -0.0105, "num_tokens": 75027362.0, "reward": 0.0, "reward_std": 0.824561357498169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16705782248396822, "rewards/wordcountpos_reward/raw_geo/std": 0.2882327883423054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.34526905381076217, "frac_reward_zero_std": 0.0, "grad_norm": 3.4565125891817323, "kl": 0.0194091796875, "learning_rate": 8.451619997474093e-07, "loss": 0.0031, "num_tokens": 75078711.0, "reward": 0.0, "reward_std": 0.6755422949790955, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07898029709678793, "rewards/wordcountpos_reward/raw_geo/std": 0.12465419440453065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1186.875, "completions/mean_terminated_length": 1166.0, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.34546909381876373, "frac_reward_zero_std": 0.0, "grad_norm": 3.3464979128691312, "kl": 0.02032470703125, "learning_rate": 8.449247369147143e-07, "loss": -0.0064, "num_tokens": 75132509.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4986012578010559, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06003935317517118, "rewards/wordcountpos_reward/raw_geo/std": 0.15599631206994707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1242.125, "completions/mean_terminated_length": 1224.933349609375, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.34566913382676534, "frac_reward_zero_std": 0.0, "grad_norm": 3.1969694187826385, "kl": 0.018463134765625, "learning_rate": 8.446873302753783e-07, "loss": 0.0129, "num_tokens": 75170847.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9715031385421753, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04690511093353454, "rewards/wordcountpos_reward/raw_geo/std": 0.1236068764265948, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 969.3125, "completions/mean_terminated_length": 969.3125, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.34586917383476695, "frac_reward_zero_std": 0.0, "grad_norm": 3.797458236038755, "kl": 0.0167999267578125, "learning_rate": 8.44449779945162e-07, "loss": 0.0034, "num_tokens": 75220660.0, "reward": -2.9802322387695312e-08, "reward_std": 0.944270133972168, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1341032308323001, "rewards/wordcountpos_reward/raw_geo/std": 0.17118007392543386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1113.0625, "completions/mean_terminated_length": 1113.0625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.34606921384276856, "frac_reward_zero_std": 0.0, "grad_norm": 2.5753878419553655, "kl": 0.0115203857421875, "learning_rate": 8.442120860398958e-07, "loss": 0.0109, "num_tokens": 75263789.0, "reward": 0.0, "reward_std": 0.9938139915466309, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09399468579210593, "rewards/wordcountpos_reward/raw_geo/std": 0.05059893019944131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1114.0, "completions/mean_terminated_length": 1114.0, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.3462692538507702, "frac_reward_zero_std": 0.0, "grad_norm": 3.684548990214174, "kl": 0.0160064697265625, "learning_rate": 8.439742486754806e-07, "loss": -0.0714, "num_tokens": 75316861.0, "reward": -1.4901161193847656e-08, "reward_std": 1.032321810722351, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06998370050948025, "rewards/wordcountpos_reward/raw_geo/std": 0.06990183708693114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1123.75, "completions/mean_terminated_length": 998.3333740234375, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.34646929385877173, "frac_reward_zero_std": 0.0, "grad_norm": 3.809040623114236, "kl": 0.017364501953125, "learning_rate": 8.437362679678868e-07, "loss": 0.0489, "num_tokens": 75356681.0, "reward": 7.450580596923828e-09, "reward_std": 1.0470911264419556, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08040011000038194, "rewards/wordcountpos_reward/raw_geo/std": 0.05801351273338138, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1085.0, "completions/mean_terminated_length": 1057.3333740234375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.34666933386677334, "frac_reward_zero_std": 0.0, "grad_norm": 2.7856413334773475, "kl": 0.012847900390625, "learning_rate": 8.434981440331549e-07, "loss": -0.0433, "num_tokens": 75397129.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0442838668823242, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12539632860729288, "rewards/wordcountpos_reward/raw_geo/std": 0.09553661606522137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1152.25, "completions/mean_terminated_length": 1152.25, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.34686937387477496, "frac_reward_zero_std": 0.0, "grad_norm": 2.730180641953594, "kl": 0.01031494140625, "learning_rate": 8.432598769873952e-07, "loss": 0.0303, "num_tokens": 75445885.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8651590347290039, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09145136700843742, "rewards/wordcountpos_reward/raw_geo/std": 0.060933200675037755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1246.0625, "completions/mean_terminated_length": 1229.1334228515625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.34706941388277657, "frac_reward_zero_std": 0.0, "grad_norm": 2.7880250306512147, "kl": 0.012237548828125, "learning_rate": 8.430214669467879e-07, "loss": 0.0286, "num_tokens": 75488278.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8757525682449341, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009141063894497122, "rewards/wordcountpos_reward/raw_geo/std": 0.13725018844144185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1180.1875, "completions/mean_terminated_length": 1134.5, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.3472694538907782, "frac_reward_zero_std": 0.0, "grad_norm": 3.1860516228123426, "kl": 0.0158538818359375, "learning_rate": 8.427829140275826e-07, "loss": -0.0216, "num_tokens": 75534937.0, "reward": 0.0, "reward_std": 0.997195839881897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1130510860792016, "rewards/wordcountpos_reward/raw_geo/std": 0.16753564914419058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 987.3125, "completions/mean_terminated_length": 987.3125, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.34746949389877974, "frac_reward_zero_std": 0.0, "grad_norm": 2.32425010820382, "kl": 0.00731658935546875, "learning_rate": 8.42544218346099e-07, "loss": -0.0094, "num_tokens": 75575374.0, "reward": 2.9802322387695312e-08, "reward_std": 0.439910352230072, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17680958607407699, "rewards/wordcountpos_reward/raw_geo/std": 0.21620985673016077, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1290.3125, "completions/mean_terminated_length": 1276.3333740234375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.34766953390678135, "frac_reward_zero_std": 0.0, "grad_norm": 2.5325818624180876, "kl": 0.0121612548828125, "learning_rate": 8.423053800187261e-07, "loss": -0.019, "num_tokens": 75622355.0, "reward": 0.0, "reward_std": 1.029433250427246, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2888202847905007, "rewards/wordcountpos_reward/raw_geo/std": 0.1869594518592459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.34786957391478296, "frac_reward_zero_std": 0.0, "grad_norm": 3.4965371075446217, "kl": 0.0186920166015625, "learning_rate": 8.420663991619226e-07, "loss": 0.0226, "num_tokens": 75674473.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8095927238464355, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.002478753896322062, "rewards/wordcountpos_reward/raw_geo/std": 0.09118875143142491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1116.125, "completions/mean_terminated_length": 1116.125, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.34806961392278457, "frac_reward_zero_std": 0.0, "grad_norm": 2.909622471583735, "kl": 0.0126800537109375, "learning_rate": 8.418272758922167e-07, "loss": 0.012, "num_tokens": 75721403.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9980233907699585, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07849789942027535, "rewards/wordcountpos_reward/raw_geo/std": 0.07002551387853127, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1037.0, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.3482696539307862, "frac_reward_zero_std": 0.0, "grad_norm": 2.207014297202443, "kl": 0.008941650390625, "learning_rate": 8.415880103262059e-07, "loss": -0.0017, "num_tokens": 75776411.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0572911500930786, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015331305535608606, "rewards/wordcountpos_reward/raw_geo/std": 0.07852050800000888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1182.3125, "completions/mean_terminated_length": 1182.3125, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.34846969393878774, "frac_reward_zero_std": 0.0, "grad_norm": 1.6067144401784816, "kl": 0.00481414794921875, "learning_rate": 8.41348602580557e-07, "loss": -0.0003, "num_tokens": 75813456.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9599651098251343, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12137200283545012, "rewards/wordcountpos_reward/raw_geo/std": 0.07096277819320206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1216.375, "completions/mean_terminated_length": 1197.4666748046875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.34866973394678935, "frac_reward_zero_std": 0.0, "grad_norm": 2.9578251346374342, "kl": 0.01739501953125, "learning_rate": 8.411090527720066e-07, "loss": -0.0317, "num_tokens": 75852774.0, "reward": -7.450580596923828e-09, "reward_std": 1.0665870904922485, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.019445109283162974, "rewards/wordcountpos_reward/raw_geo/std": 0.10738324269923563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1338.875, "completions/mean_terminated_length": 1265.6363525390625, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.34886977395479096, "frac_reward_zero_std": 0.0, "grad_norm": 3.194055635114125, "kl": 0.0172119140625, "learning_rate": 8.408693610173603e-07, "loss": -0.0052, "num_tokens": 75899060.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9547780752182007, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2261594942519671, "rewards/wordcountpos_reward/raw_geo/std": 0.10865201701639123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1015.9375, "completions/mean_terminated_length": 1015.9375, "completions/min_length": 630.0, "completions/min_terminated_length": 630.0, "epoch": 0.3490698139627926, "frac_reward_zero_std": 0.0, "grad_norm": 3.906406807819015, "kl": 0.0156402587890625, "learning_rate": 8.406295274334926e-07, "loss": 0.0393, "num_tokens": 75944059.0, "reward": 0.0, "reward_std": 0.9560251235961914, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11774425628935953, "rewards/wordcountpos_reward/raw_geo/std": 0.06312923728325064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1147.0, "completions/mean_terminated_length": 1096.571533203125, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.3492698539707942, "frac_reward_zero_std": 0.0, "grad_norm": 3.2051816922772183, "kl": 0.0120849609375, "learning_rate": 8.403895521373476e-07, "loss": 0.0615, "num_tokens": 75978387.0, "reward": 0.0, "reward_std": 0.9096341133117676, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21622720739252863, "rewards/wordcountpos_reward/raw_geo/std": 0.13055362001242565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1178.8125, "completions/mean_terminated_length": 1104.6923828125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.34946989397879574, "frac_reward_zero_std": 0.0, "grad_norm": 3.176936657223504, "kl": 0.014678955078125, "learning_rate": 8.401494352459384e-07, "loss": 0.0126, "num_tokens": 76021520.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0628066062927246, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12319832703928207, "rewards/wordcountpos_reward/raw_geo/std": 0.15112797179986387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1080.125, "completions/mean_terminated_length": 1080.125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.34966993398679735, "frac_reward_zero_std": 0.0, "grad_norm": 3.423460930142355, "kl": 0.0159759521484375, "learning_rate": 8.39909176876347e-07, "loss": 0.0112, "num_tokens": 76071522.0, "reward": 0.0, "reward_std": 0.6338709592819214, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.169872426761819, "rewards/wordcountpos_reward/raw_geo/std": 0.1506294649817365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1180.933349609375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.34986997399479897, "frac_reward_zero_std": 0.0, "grad_norm": 3.1314582897293115, "kl": 0.0126495361328125, "learning_rate": 8.396687771457245e-07, "loss": -0.0207, "num_tokens": 76106608.0, "reward": 0.0, "reward_std": 0.8720186948776245, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1744394013619878, "rewards/wordcountpos_reward/raw_geo/std": 0.12667434068599262, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1083.25, "completions/mean_terminated_length": 1083.25, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.3500700140028006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1063834124273377, "kl": 0.01104736328125, "learning_rate": 8.39428236171291e-07, "loss": 0.0088, "num_tokens": 76139332.0, "reward": 0.0, "reward_std": 1.0650572776794434, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0967017304476083, "rewards/wordcountpos_reward/raw_geo/std": 0.3101797621178311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1147.8333740234375, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.35027005401080213, "frac_reward_zero_std": 0.0, "grad_norm": 3.31142509435829, "kl": 0.0143280029296875, "learning_rate": 8.391875540703353e-07, "loss": -0.007, "num_tokens": 76189370.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8450350165367126, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07960390048839829, "rewards/wordcountpos_reward/raw_geo/std": 0.16827951928991716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 912.75, "completions/mean_terminated_length": 912.75, "completions/min_length": 586.0, "completions/min_terminated_length": 586.0, "epoch": 0.35047009401880375, "frac_reward_zero_std": 0.0, "grad_norm": 3.019101860287366, "kl": 0.0133209228515625, "learning_rate": 8.389467309602149e-07, "loss": -0.0182, "num_tokens": 76219222.0, "reward": 0.0, "reward_std": 1.0596234798431396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1749562641479956, "rewards/wordcountpos_reward/raw_geo/std": 0.07395599709580569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 909.5625, "completions/mean_terminated_length": 909.5625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.35067013402680536, "frac_reward_zero_std": 0.0, "grad_norm": 3.442055777582219, "kl": 0.013916015625, "learning_rate": 8.387057669583564e-07, "loss": -0.0156, "num_tokens": 76251567.0, "reward": 0.0, "reward_std": 0.8959325551986694, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058975670117623454, "rewards/wordcountpos_reward/raw_geo/std": 0.09082896561303558, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1100.875, "completions/mean_terminated_length": 1074.2667236328125, "completions/min_length": 711.0, "completions/min_terminated_length": 711.0, "epoch": 0.35087017403480697, "frac_reward_zero_std": 0.0, "grad_norm": 3.656090652636485, "kl": 0.017181396484375, "learning_rate": 8.38464662182255e-07, "loss": 0.0486, "num_tokens": 76302085.0, "reward": 0.0, "reward_std": 0.8695361018180847, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008367596152448373, "rewards/wordcountpos_reward/raw_geo/std": 0.044893141799502344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1175.625, "completions/mean_terminated_length": 1154.0, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.3510702140428086, "frac_reward_zero_std": 0.0, "grad_norm": 3.0395418994086687, "kl": 0.0147247314453125, "learning_rate": 8.382234167494747e-07, "loss": 0.0273, "num_tokens": 76343591.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0580565929412842, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23148631681277232, "rewards/wordcountpos_reward/raw_geo/std": 0.2582161452010852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1021.5, "completions/mean_terminated_length": 1021.5, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.35127025405081014, "frac_reward_zero_std": 0.0, "grad_norm": 3.3850257953326577, "kl": 0.01971435546875, "learning_rate": 8.379820307776472e-07, "loss": -0.0399, "num_tokens": 76385975.0, "reward": 0.0, "reward_std": 0.8627417087554932, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1063942083960841, "rewards/wordcountpos_reward/raw_geo/std": 0.11656831947156639, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 898.75, "completions/mean_terminated_length": 898.75, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.35147029405881175, "frac_reward_zero_std": 0.0, "grad_norm": 3.744633595515264, "kl": 0.0173492431640625, "learning_rate": 8.377405043844739e-07, "loss": -0.0064, "num_tokens": 76425803.0, "reward": 0.0, "reward_std": 0.8662160634994507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08285859887325972, "rewards/wordcountpos_reward/raw_geo/std": 0.14720409028205564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1176.75, "completions/mean_terminated_length": 1155.2000732421875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.35167033406681336, "frac_reward_zero_std": 0.0, "grad_norm": 3.733777188519865, "kl": 0.0174713134765625, "learning_rate": 8.374988376877241e-07, "loss": 0.0106, "num_tokens": 76467143.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9899064302444458, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0010663061631800974, "rewards/wordcountpos_reward/raw_geo/std": 0.09378112727120681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1083.625, "completions/mean_terminated_length": 1055.86669921875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.351870374074815, "frac_reward_zero_std": 0.0, "grad_norm": 3.004270012403573, "kl": 0.01458740234375, "learning_rate": 8.372570308052356e-07, "loss": -0.0076, "num_tokens": 76496665.0, "reward": 0.0, "reward_std": 0.5205124616622925, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0835022272872219, "rewards/wordcountpos_reward/raw_geo/std": 0.10942189775086408, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1261.625, "completions/mean_terminated_length": 1206.615478515625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.3520704140828166, "frac_reward_zero_std": 0.0, "grad_norm": 3.37723527024958, "kl": 0.016510009765625, "learning_rate": 8.370150838549143e-07, "loss": -0.0145, "num_tokens": 76548051.0, "reward": 0.0, "reward_std": 0.6273642778396606, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029543889821512806, "rewards/wordcountpos_reward/raw_geo/std": 0.2024231662227156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1085.9375, "completions/mean_terminated_length": 990.3846435546875, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.35227045409081814, "frac_reward_zero_std": 0.0, "grad_norm": 2.3739715401346904, "kl": 0.0108642578125, "learning_rate": 8.36772996954735e-07, "loss": -0.0173, "num_tokens": 76590218.0, "reward": 0.0, "reward_std": 1.0399260520935059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01047343397234857, "rewards/wordcountpos_reward/raw_geo/std": 0.09854378166513973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1214.4375, "completions/mean_terminated_length": 1214.4375, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.35247049409881975, "frac_reward_zero_std": 0.0, "grad_norm": 3.168514414031809, "kl": 0.0147857666015625, "learning_rate": 8.365307702227402e-07, "loss": 0.0172, "num_tokens": 76629673.0, "reward": 0.0, "reward_std": 0.8143726587295532, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03433872607048457, "rewards/wordcountpos_reward/raw_geo/std": 0.1034153093498973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 946.4375, "completions/mean_terminated_length": 946.4375, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.35267053410682137, "frac_reward_zero_std": 0.0, "grad_norm": 3.32028351732439, "kl": 0.0130462646484375, "learning_rate": 8.362884037770406e-07, "loss": 0.0016, "num_tokens": 76671632.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0393953323364258, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10284659422672278, "rewards/wordcountpos_reward/raw_geo/std": 0.10992351338458453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1168.71435546875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.352870574114823, "frac_reward_zero_std": 0.0, "grad_norm": 2.9190152704108017, "kl": 0.012481689453125, "learning_rate": 8.360458977358153e-07, "loss": -0.0071, "num_tokens": 76719610.0, "reward": 0.0, "reward_std": 0.9037257432937622, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23087200788901413, "rewards/wordcountpos_reward/raw_geo/std": 0.08185484576012153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 845.888916015625, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.3530706141228246, "frac_reward_zero_std": 0.0, "grad_norm": 2.996926037574471, "kl": 0.012237548828125, "learning_rate": 8.358032522173114e-07, "loss": 0.002, "num_tokens": 76763067.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0625553131103516, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10718183920250576, "rewards/wordcountpos_reward/raw_geo/std": 0.09454480939161107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1141.125, "completions/mean_terminated_length": 1141.125, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.35327065413082614, "frac_reward_zero_std": 0.0, "grad_norm": 3.4301525780618216, "kl": 0.017669677734375, "learning_rate": 8.35560467339844e-07, "loss": 0.0086, "num_tokens": 76816261.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6547678709030151, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09361613549072692, "rewards/wordcountpos_reward/raw_geo/std": 0.07929776414886555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1022.0, "completions/mean_terminated_length": 1022.0, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.35347069413882776, "frac_reward_zero_std": 0.0, "grad_norm": 3.5552046928640513, "kl": 0.019317626953125, "learning_rate": 8.353175432217959e-07, "loss": -0.0296, "num_tokens": 76857301.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8594515323638916, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09115010401761779, "rewards/wordcountpos_reward/raw_geo/std": 0.06978551704640325, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1137.8125, "completions/mean_terminated_length": 1113.666748046875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.35367073414682937, "frac_reward_zero_std": 0.0, "grad_norm": 3.5134054589999293, "kl": 0.017913818359375, "learning_rate": 8.35074479981618e-07, "loss": 0.0267, "num_tokens": 76908578.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6233526468276978, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14070246879262074, "rewards/wordcountpos_reward/raw_geo/std": 0.13427832917434657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1143.3125, "completions/mean_terminated_length": 1119.533447265625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.353870774154831, "frac_reward_zero_std": 0.0, "grad_norm": 3.4394738247277834, "kl": 0.0155487060546875, "learning_rate": 8.348312777378293e-07, "loss": 0.0055, "num_tokens": 76952119.0, "reward": 0.0, "reward_std": 0.3432086110115051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025926713601043035, "rewards/wordcountpos_reward/raw_geo/std": 0.18160445002803968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1018.5625, "completions/mean_terminated_length": 1018.5625, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.3540708141628326, "frac_reward_zero_std": 0.0, "grad_norm": 3.165086856832091, "kl": 0.0156707763671875, "learning_rate": 8.345879366090164e-07, "loss": 0.0321, "num_tokens": 76999672.0, "reward": 0.0, "reward_std": 1.0659531354904175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02539357552940831, "rewards/wordcountpos_reward/raw_geo/std": 0.233092306338323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1307.625, "completions/mean_terminated_length": 1158.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.35427085417083415, "frac_reward_zero_std": 0.0, "grad_norm": 3.1976807096972037, "kl": 0.0146484375, "learning_rate": 8.343444567138331e-07, "loss": -0.059, "num_tokens": 77053842.0, "reward": 2.9802322387695312e-08, "reward_std": 0.845935046672821, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03995322137849134, "rewards/wordcountpos_reward/raw_geo/std": 0.16344018957257903, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1249.0625, "completions/mean_terminated_length": 1191.1539306640625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.35447089417883576, "frac_reward_zero_std": 0.0, "grad_norm": 3.0944915475823533, "kl": 0.0135498046875, "learning_rate": 8.341008381710015e-07, "loss": -0.0275, "num_tokens": 77107307.0, "reward": -7.450580596923828e-09, "reward_std": 1.0531097650527954, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.17963238129819156, "rewards/wordcountpos_reward/raw_geo/std": 0.08655035043001716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353542, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1060.0, "completions/mean_terminated_length": 1030.666748046875, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.35467093418683737, "frac_reward_zero_std": 0.0, "grad_norm": 2.244421068701371, "kl": 0.006877899169921875, "learning_rate": 8.338570810993111e-07, "loss": -0.0262, "num_tokens": 77146227.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0150959491729736, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18955963082929525, "rewards/wordcountpos_reward/raw_geo/std": 0.13371337517066578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1044.375, "completions/mean_terminated_length": 1044.375, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.354870974194839, "frac_reward_zero_std": 0.0, "grad_norm": 3.2089717010263716, "kl": 0.0108642578125, "learning_rate": 8.336131856176192e-07, "loss": -0.0224, "num_tokens": 77179665.0, "reward": 0.0, "reward_std": 0.7413496971130371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17749997584678434, "rewards/wordcountpos_reward/raw_geo/std": 0.08729281042438872, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1251.75, "completions/mean_terminated_length": 1235.2000732421875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.3550710142028406, "frac_reward_zero_std": 0.0, "grad_norm": 2.757737208480071, "kl": 0.0124053955078125, "learning_rate": 8.3336915184485e-07, "loss": -0.0491, "num_tokens": 77228437.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5085335373878479, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05196947074689537, "rewards/wordcountpos_reward/raw_geo/std": 0.12807978933516145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.17888543819998318, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1164.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.35527105421084215, "frac_reward_zero_std": 0.0, "grad_norm": 2.1112946781110837, "kl": 0.0085906982421875, "learning_rate": 8.331249798999957e-07, "loss": -0.0377, "num_tokens": 77280243.0, "reward": 0.0, "reward_std": 0.8375033140182495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08948196588720646, "rewards/wordcountpos_reward/raw_geo/std": 0.10307202962667876, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1127.4375, "completions/mean_terminated_length": 903.9000244140625, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.35547109421884376, "frac_reward_zero_std": 0.0, "grad_norm": 3.0071592494705013, "kl": 0.011260986328125, "learning_rate": 8.328806699021155e-07, "loss": -0.039, "num_tokens": 77323906.0, "reward": 3.725290298461914e-09, "reward_std": 1.045759916305542, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.062323154744563236, "rewards/wordcountpos_reward/raw_geo/std": 0.07456624149110516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1102.9375, "completions/mean_terminated_length": 1102.9375, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3556711342268454, "frac_reward_zero_std": 0.0, "grad_norm": 3.4477285194911587, "kl": 0.0173187255859375, "learning_rate": 8.32636221970336e-07, "loss": 0.0096, "num_tokens": 77362897.0, "reward": 0.0, "reward_std": 0.6526699066162109, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18625682606729063, "rewards/wordcountpos_reward/raw_geo/std": 0.2808035713316073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 954.8125, "completions/mean_terminated_length": 954.8125, "completions/min_length": 520.0, "completions/min_terminated_length": 520.0, "epoch": 0.355871174234847, "frac_reward_zero_std": 0.0, "grad_norm": 3.262044057617541, "kl": 0.0113677978515625, "learning_rate": 8.323916362238514e-07, "loss": -0.0185, "num_tokens": 77401254.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8943012356758118, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014424672357033647, "rewards/wordcountpos_reward/raw_geo/std": 0.11751598132442093, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 995.5, "completions/mean_terminated_length": 995.5, "completions/min_length": 627.0, "completions/min_terminated_length": 627.0, "epoch": 0.35607121424284854, "frac_reward_zero_std": 0.0, "grad_norm": 3.1634840410757237, "kl": 0.0244293212890625, "learning_rate": 8.321469127819227e-07, "loss": -0.0083, "num_tokens": 77447302.0, "reward": 0.0, "reward_std": 0.6351709365844727, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21175268534125855, "rewards/wordcountpos_reward/raw_geo/std": 0.225294773399291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1245.1875, "completions/mean_terminated_length": 1092.300048828125, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.35627125425085016, "frac_reward_zero_std": 0.0, "grad_norm": 3.1426916488304095, "kl": 0.0146331787109375, "learning_rate": 8.31902051763878e-07, "loss": -0.0137, "num_tokens": 77491857.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9163140058517456, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015321193783817896, "rewards/wordcountpos_reward/raw_geo/std": 0.05993491944938508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1039.0625, "completions/mean_terminated_length": 1039.0625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.35647129425885177, "frac_reward_zero_std": 0.0, "grad_norm": 3.3318399238376086, "kl": 0.01934814453125, "learning_rate": 8.316570532891128e-07, "loss": -0.0687, "num_tokens": 77528714.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9354727268218994, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08987492245923824, "rewards/wordcountpos_reward/raw_geo/std": 0.07136254523261959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1071.75, "completions/mean_terminated_length": 1071.75, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.3566713342668534, "frac_reward_zero_std": 0.0, "grad_norm": 3.455631229743058, "kl": 0.017608642578125, "learning_rate": 8.314119174770893e-07, "loss": -0.0208, "num_tokens": 77570486.0, "reward": 0.0, "reward_std": 0.8518043756484985, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04871386234198273, "rewards/wordcountpos_reward/raw_geo/std": 0.10190213861526806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1085.8125, "completions/mean_terminated_length": 1085.8125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.356871374274855, "frac_reward_zero_std": 0.0, "grad_norm": 3.4100870022398997, "kl": 0.019775390625, "learning_rate": 8.311666444473372e-07, "loss": 0.0125, "num_tokens": 77614355.0, "reward": -4.0978193283081055e-08, "reward_std": 0.8546255826950073, "rewards/wordcountpos_reward/mean": -4.0978193283081055e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059011478210088786, "rewards/wordcountpos_reward/raw_geo/std": 0.0808333892516301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 971.8125, "completions/mean_terminated_length": 971.8125, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.35707141428285655, "frac_reward_zero_std": 0.0, "grad_norm": 3.3795069928788344, "kl": 0.0184173583984375, "learning_rate": 8.309212343194524e-07, "loss": 0.0101, "num_tokens": 77654272.0, "reward": 2.9802322387695312e-08, "reward_std": 0.41825148463249207, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20585509560751594, "rewards/wordcountpos_reward/raw_geo/std": 0.31986154025481617, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1041.4375, "completions/mean_terminated_length": 1041.4375, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.35727145429085816, "frac_reward_zero_std": 0.0, "grad_norm": 2.9447435996897733, "kl": 0.01152801513671875, "learning_rate": 8.306756872130981e-07, "loss": 0.0089, "num_tokens": 77695927.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0381940603256226, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03888574278295472, "rewards/wordcountpos_reward/raw_geo/std": 0.14458618401927192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1344.9375, "completions/mean_terminated_length": 1189.875, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.35747149429885977, "frac_reward_zero_std": 0.0, "grad_norm": 2.8578058942876385, "kl": 0.0134735107421875, "learning_rate": 8.304300032480043e-07, "loss": -0.0646, "num_tokens": 77751822.0, "reward": 0.0, "reward_std": 0.8696860074996948, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20234087789731361, "rewards/wordcountpos_reward/raw_geo/std": 0.2671582198031056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1322.4375, "completions/mean_terminated_length": 1310.60009765625, "completions/min_length": 1192.0, "completions/min_terminated_length": 1192.0, "epoch": 0.3576715343068614, "frac_reward_zero_std": 0.0, "grad_norm": 3.0353406386246156, "kl": 0.0142364501953125, "learning_rate": 8.301841825439674e-07, "loss": 0.0295, "num_tokens": 77803645.0, "reward": 0.0, "reward_std": 0.786299467086792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03207476653092414, "rewards/wordcountpos_reward/raw_geo/std": 0.14288910554864873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1097.5625, "completions/mean_terminated_length": 1070.7333984375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.357871574314863, "frac_reward_zero_std": 0.0, "grad_norm": 3.227660610138832, "kl": 0.0164642333984375, "learning_rate": 8.299382252208508e-07, "loss": 0.0033, "num_tokens": 77855214.0, "reward": -4.470348358154297e-08, "reward_std": 0.9033418297767639, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036960299769436405, "rewards/wordcountpos_reward/raw_geo/std": 0.06914520166885942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125757, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1052.1875, "completions/mean_terminated_length": 1052.1875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.35807161432286455, "frac_reward_zero_std": 0.0, "grad_norm": 3.5242901139041134, "kl": 0.0148773193359375, "learning_rate": 8.296921313985845e-07, "loss": -0.0139, "num_tokens": 77892489.0, "reward": 0.0, "reward_std": 0.9171948432922363, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06865091122028667, "rewards/wordcountpos_reward/raw_geo/std": 0.03947434604465728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1130.3125, "completions/mean_terminated_length": 1130.3125, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.35827165433086616, "frac_reward_zero_std": 0.0, "grad_norm": 3.290439707470091, "kl": 0.0160369873046875, "learning_rate": 8.294459011971648e-07, "loss": -0.0106, "num_tokens": 77936766.0, "reward": 3.725290298461914e-08, "reward_std": 1.065662145614624, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21022650979966945, "rewards/wordcountpos_reward/raw_geo/std": 0.06940481212345954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 1016.0, "completions/mean_terminated_length": 1016.0, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.3584716943388678, "frac_reward_zero_std": 0.0, "grad_norm": 3.1474353577263376, "kl": 0.014129638671875, "learning_rate": 8.291995347366549e-07, "loss": -0.0008, "num_tokens": 77982502.0, "reward": 4.470348358154297e-08, "reward_std": 0.9738003611564636, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026166755509749413, "rewards/wordcountpos_reward/raw_geo/std": 0.05185383738303213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.3586717343468694, "frac_reward_zero_std": 0.0, "grad_norm": 3.4636368111927873, "kl": 0.021484375, "learning_rate": 8.289530321371838e-07, "loss": -0.0086, "num_tokens": 78034843.0, "reward": 2.60770320892334e-08, "reward_std": 0.9589567184448242, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017555790945825253, "rewards/wordcountpos_reward/raw_geo/std": 0.0765506395894115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1136.5, "completions/mean_terminated_length": 1084.571533203125, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.358871774354871, "frac_reward_zero_std": 0.0, "grad_norm": 3.478633424768702, "kl": 0.01849365234375, "learning_rate": 8.287063935189477e-07, "loss": -0.0639, "num_tokens": 78086011.0, "reward": 0.0, "reward_std": 0.4624847173690796, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025231276556521638, "rewards/wordcountpos_reward/raw_geo/std": 0.09838948488047025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1192.5, "completions/mean_terminated_length": 1172.0001220703125, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.35907181436287255, "frac_reward_zero_std": 0.0, "grad_norm": 3.1218685952297087, "kl": 0.0150299072265625, "learning_rate": 8.284596190022084e-07, "loss": -0.007, "num_tokens": 78128323.0, "reward": -3.725290298461914e-09, "reward_std": 0.9593905210494995, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10021236115452767, "rewards/wordcountpos_reward/raw_geo/std": 0.24870041245817823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1211.75, "completions/mean_terminated_length": 1192.533447265625, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.35927185437087417, "frac_reward_zero_std": 0.0, "grad_norm": 2.741819628650346, "kl": 0.0112457275390625, "learning_rate": 8.282127087072945e-07, "loss": 0.0092, "num_tokens": 78176863.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9650435447692871, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016194140196833164, "rewards/wordcountpos_reward/raw_geo/std": 0.06246765301296056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1198.625, "completions/mean_terminated_length": 1098.166748046875, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.3594718943788758, "frac_reward_zero_std": 0.0, "grad_norm": 2.658873597014003, "kl": 0.0145416259765625, "learning_rate": 8.279656627546006e-07, "loss": 0.0426, "num_tokens": 78224145.0, "reward": 0.0, "reward_std": 0.9768966436386108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11823837943433536, "rewards/wordcountpos_reward/raw_geo/std": 0.16374966888855674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 879.4375, "completions/mean_terminated_length": 879.4375, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.3596719343868774, "frac_reward_zero_std": 0.0, "grad_norm": 3.4756289237640314, "kl": 0.0147857666015625, "learning_rate": 8.277184812645872e-07, "loss": -0.0306, "num_tokens": 78251392.0, "reward": 0.0, "reward_std": 0.8145972490310669, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07989463934851469, "rewards/wordcountpos_reward/raw_geo/std": 0.10843930532673866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.359871974394879, "frac_reward_zero_std": 0.0, "grad_norm": 1.50562663949324, "kl": 0.005107879638671875, "learning_rate": 8.274711643577812e-07, "loss": -0.014, "num_tokens": 78286289.0, "reward": 0.0, "reward_std": 0.5736739039421082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.004131526745284645, "rewards/wordcountpos_reward/raw_geo/std": 0.05687678880133583, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13326387079497304, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 980.9375, "completions/mean_terminated_length": 980.9375, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.36007201440288056, "frac_reward_zero_std": 0.0, "grad_norm": 2.0084882948467606, "kl": 0.004909515380859375, "learning_rate": 8.272237121547755e-07, "loss": -0.0221, "num_tokens": 78319024.0, "reward": 0.0, "reward_std": 0.6539640426635742, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06237594292814795, "rewards/wordcountpos_reward/raw_geo/std": 0.09414410926475156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1058.625, "completions/mean_terminated_length": 1058.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.36027205441088217, "frac_reward_zero_std": 0.0, "grad_norm": 3.7825485336227467, "kl": 0.01611328125, "learning_rate": 8.26976124776229e-07, "loss": -0.0002, "num_tokens": 78355426.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9959114789962769, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014423863406516265, "rewards/wordcountpos_reward/raw_geo/std": 0.06680053221513364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 986.5, "completions/mean_terminated_length": 986.5, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.3604720944188838, "frac_reward_zero_std": 0.0, "grad_norm": 3.48257815338035, "kl": 0.016845703125, "learning_rate": 8.267284023428663e-07, "loss": -0.0355, "num_tokens": 78398442.0, "reward": 0.0, "reward_std": 0.9134250283241272, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016927365041500324, "rewards/wordcountpos_reward/raw_geo/std": 0.04207980325940796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1116.375, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.3606721344268854, "frac_reward_zero_std": 0.0, "grad_norm": 2.2265140921951354, "kl": 0.0116119384765625, "learning_rate": 8.264805449754781e-07, "loss": -0.0177, "num_tokens": 78449560.0, "reward": -4.470348358154297e-08, "reward_std": 1.0288808345794678, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14095206227850182, "rewards/wordcountpos_reward/raw_geo/std": 0.09311703185119931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1177.0625, "completions/mean_terminated_length": 1177.0625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.36087217443488695, "frac_reward_zero_std": 0.0, "grad_norm": 2.949699527487144, "kl": 0.01171112060546875, "learning_rate": 8.262325527949206e-07, "loss": 0.0258, "num_tokens": 78494049.0, "reward": 0.0, "reward_std": 0.7659751772880554, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006762738395120067, "rewards/wordcountpos_reward/raw_geo/std": 0.08085406769676408, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.36107221444288856, "frac_reward_zero_std": 0.0, "grad_norm": 3.495448379744296, "kl": 0.01702880859375, "learning_rate": 8.25984425922116e-07, "loss": 0.012, "num_tokens": 78534271.0, "reward": 0.0, "reward_std": 0.8465592861175537, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05835433192742853, "rewards/wordcountpos_reward/raw_geo/std": 0.09148151953920727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1292.0, "completions/mean_terminated_length": 1167.2000732421875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.3612722544508902, "frac_reward_zero_std": 0.0, "grad_norm": 3.21159157079306, "kl": 0.0166778564453125, "learning_rate": 8.257361644780519e-07, "loss": -0.0547, "num_tokens": 78577151.0, "reward": 3.725290298461914e-08, "reward_std": 0.9943972826004028, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24471950128552003, "rewards/wordcountpos_reward/raw_geo/std": 0.2686332432030244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1212.5, "completions/mean_terminated_length": 1171.4285888671875, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.3614722944588918, "frac_reward_zero_std": 0.0, "grad_norm": 2.9356515905635083, "kl": 0.0156707763671875, "learning_rate": 8.254877685837824e-07, "loss": -0.0322, "num_tokens": 78622559.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0484960079193115, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00981040337832653, "rewards/wordcountpos_reward/raw_geo/std": 0.05010981423305183, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1090.75, "completions/mean_terminated_length": 1063.4666748046875, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.3616723344668934, "frac_reward_zero_std": 0.0, "grad_norm": 3.1252609672388645, "kl": 0.01495361328125, "learning_rate": 8.252392383604255e-07, "loss": 0.0216, "num_tokens": 78663371.0, "reward": -3.725290298461914e-08, "reward_std": 0.9002479314804077, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07777724975624825, "rewards/wordcountpos_reward/raw_geo/std": 0.10757979186213981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1252.8125, "completions/mean_terminated_length": 1060.5555419921875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.36187237447489495, "frac_reward_zero_std": 0.0, "grad_norm": 2.9499258805514774, "kl": 0.0132904052734375, "learning_rate": 8.249905739291665e-07, "loss": -0.048, "num_tokens": 78721856.0, "reward": 0.0, "reward_std": 0.29172074794769287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16820539670743995, "rewards/wordcountpos_reward/raw_geo/std": 0.17688862008960812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1105.375, "completions/mean_terminated_length": 1105.375, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.36207241448289657, "frac_reward_zero_std": 0.0, "grad_norm": 3.302669162572897, "kl": 0.0185546875, "learning_rate": 8.247417754112548e-07, "loss": -0.0221, "num_tokens": 78764430.0, "reward": 0.0, "reward_std": 0.9743021726608276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.057774432361472575, "rewards/wordcountpos_reward/raw_geo/std": 0.17914478841372436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1074.3125, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.3622724544908982, "frac_reward_zero_std": 0.0, "grad_norm": 2.8862356222855836, "kl": 0.0129852294921875, "learning_rate": 8.244928429280058e-07, "loss": -0.0118, "num_tokens": 78815435.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0328383445739746, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19936709670928118, "rewards/wordcountpos_reward/raw_geo/std": 0.12374873256768339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1270.5625, "completions/mean_terminated_length": 1255.2667236328125, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.3624724944988998, "frac_reward_zero_std": 0.0, "grad_norm": 3.362947019605936, "kl": 0.018341064453125, "learning_rate": 8.242437766008001e-07, "loss": -0.003, "num_tokens": 78864652.0, "reward": 0.0, "reward_std": 0.9848069548606873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2910261059173036, "rewards/wordcountpos_reward/raw_geo/std": 0.4390551779593427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1398.25, "completions/mean_terminated_length": 1267.4285888671875, "completions/min_length": 1193.0, "completions/min_terminated_length": 1193.0, "epoch": 0.3626725345069014, "frac_reward_zero_std": 0.0, "grad_norm": 2.9282462948694925, "kl": 0.016021728515625, "learning_rate": 8.239945765510837e-07, "loss": -0.0014, "num_tokens": 78921368.0, "reward": 0.0, "reward_std": 1.020251989364624, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10727308133884657, "rewards/wordcountpos_reward/raw_geo/std": 0.07128573848954754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1007.25, "completions/mean_terminated_length": 1007.25, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.36287257451490296, "frac_reward_zero_std": 0.0, "grad_norm": 3.844677273517912, "kl": 0.021209716796875, "learning_rate": 8.237452429003676e-07, "loss": 0.0591, "num_tokens": 78960388.0, "reward": 0.0, "reward_std": 0.9148995876312256, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.021322126686447004, "rewards/wordcountpos_reward/raw_geo/std": 0.14209340342548074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1054.5625, "completions/mean_terminated_length": 1054.5625, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.36307261452290457, "frac_reward_zero_std": 0.0, "grad_norm": 3.047800812824658, "kl": 0.01251220703125, "learning_rate": 8.23495775770228e-07, "loss": -0.0189, "num_tokens": 78993869.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9362246990203857, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12353954082351107, "rewards/wordcountpos_reward/raw_geo/std": 0.24604683802449492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1059.25, "completions/mean_terminated_length": 1059.25, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.3632726545309062, "frac_reward_zero_std": 0.0, "grad_norm": 3.5556943495654374, "kl": 0.0140838623046875, "learning_rate": 8.232461752823062e-07, "loss": -0.0575, "num_tokens": 79049849.0, "reward": 0.0, "reward_std": 0.9359241724014282, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0049627015493997366, "rewards/wordcountpos_reward/raw_geo/std": 0.03136479253111927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1325.75, "completions/mean_terminated_length": 1285.5384521484375, "completions/min_length": 1155.0, "completions/min_terminated_length": 1155.0, "epoch": 0.3634726945389078, "frac_reward_zero_std": 0.0, "grad_norm": 3.0159906307633295, "kl": 0.0191192626953125, "learning_rate": 8.229964415583086e-07, "loss": -0.0058, "num_tokens": 79092245.0, "reward": 5.960464477539063e-08, "reward_std": 0.7740247249603271, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.141742588716008, "rewards/wordcountpos_reward/raw_geo/std": 0.18053920611945395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1168.8125, "completions/mean_terminated_length": 1168.8125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.3636727345469094, "frac_reward_zero_std": 0.0, "grad_norm": 3.622809751475295, "kl": 0.01934814453125, "learning_rate": 8.227465747200064e-07, "loss": -0.031, "num_tokens": 79142082.0, "reward": 0.0, "reward_std": 0.9605069160461426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08405482463646154, "rewards/wordcountpos_reward/raw_geo/std": 0.08096982774467543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1280.6875, "completions/mean_terminated_length": 1266.0667724609375, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.36387277455491096, "frac_reward_zero_std": 0.0, "grad_norm": 3.0529833266420776, "kl": 0.0132904052734375, "learning_rate": 8.224965748892358e-07, "loss": 0.0162, "num_tokens": 79180189.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0431344509124756, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26369666091407545, "rewards/wordcountpos_reward/raw_geo/std": 0.3036814403641429, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1320.75, "completions/mean_terminated_length": 1279.3846435546875, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.3640728145629126, "frac_reward_zero_std": 0.0, "grad_norm": 3.2941304181565414, "kl": 0.017578125, "learning_rate": 8.222464421878981e-07, "loss": 0.0146, "num_tokens": 79228385.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7098006010055542, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057948495883413026, "rewards/wordcountpos_reward/raw_geo/std": 0.06234528827688029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1274.75, "completions/mean_terminated_length": 1049.5, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.3642728545709142, "frac_reward_zero_std": 0.0, "grad_norm": 3.3409530432648826, "kl": 0.0156707763671875, "learning_rate": 8.219961767379586e-07, "loss": -0.0277, "num_tokens": 79274237.0, "reward": 0.0, "reward_std": 0.9899231791496277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.057861876067617266, "rewards/wordcountpos_reward/raw_geo/std": 0.14477782639437073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1029.25, "completions/mean_terminated_length": 1029.25, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.3644728945789158, "frac_reward_zero_std": 0.0, "grad_norm": 3.0071070392314367, "kl": 0.019500732421875, "learning_rate": 8.217457786614486e-07, "loss": -0.006, "num_tokens": 79312465.0, "reward": -7.450580596923828e-09, "reward_std": 1.045750617980957, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14147908353252175, "rewards/wordcountpos_reward/raw_geo/std": 0.09345848530321599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1204.75, "completions/mean_terminated_length": 1136.615478515625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.3646729345869174, "frac_reward_zero_std": 0.0, "grad_norm": 3.144147743271278, "kl": 0.016754150390625, "learning_rate": 8.214952480804626e-07, "loss": -0.03, "num_tokens": 79355541.0, "reward": 0.0, "reward_std": 0.9408653974533081, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07703014828798162, "rewards/wordcountpos_reward/raw_geo/std": 0.30097141426609236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1201.375, "completions/mean_terminated_length": 1065.6363525390625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.36487297459491896, "frac_reward_zero_std": 0.0, "grad_norm": 2.976861528842559, "kl": 0.0150909423828125, "learning_rate": 8.212445851171611e-07, "loss": -0.0047, "num_tokens": 79409979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.26856812834739685, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10642404463950794, "rewards/wordcountpos_reward/raw_geo/std": 0.24518872089468613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1219.0625, "completions/mean_terminated_length": 1200.3333740234375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.3650730146029206, "frac_reward_zero_std": 0.0, "grad_norm": 2.87775218369435, "kl": 0.0119781494140625, "learning_rate": 8.20993789893768e-07, "loss": -0.0394, "num_tokens": 79449748.0, "reward": 5.960464477539063e-08, "reward_std": 0.3918062746524811, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12539962576777897, "rewards/wordcountpos_reward/raw_geo/std": 0.1661852518675689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1033.1875, "completions/mean_terminated_length": 1002.0667114257812, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.3652730546109222, "frac_reward_zero_std": 0.0, "grad_norm": 3.0024437396519903, "kl": 0.00919342041015625, "learning_rate": 8.207428625325724e-07, "loss": 0.0113, "num_tokens": 79480151.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8506966829299927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013722265177571254, "rewards/wordcountpos_reward/raw_geo/std": 0.08267955611830852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1030.125, "completions/mean_terminated_length": 1030.125, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.3654730946189238, "frac_reward_zero_std": 0.0, "grad_norm": 3.0778783430712706, "kl": 0.0142364501953125, "learning_rate": 8.204918031559278e-07, "loss": -0.0072, "num_tokens": 79514409.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8732375502586365, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10469602008461053, "rewards/wordcountpos_reward/raw_geo/std": 0.17817002967232337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 902.625, "completions/mean_terminated_length": 902.625, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.3656731346269254, "frac_reward_zero_std": 0.0, "grad_norm": 2.8507718348998043, "kl": 0.01122283935546875, "learning_rate": 8.202406118862515e-07, "loss": -0.049, "num_tokens": 79541803.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9573464393615723, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014775020794371912, "rewards/wordcountpos_reward/raw_geo/std": 0.028827887039568713, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1289.0, "completions/mean_terminated_length": 1240.3077392578125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.36587317463492697, "frac_reward_zero_std": 0.0, "grad_norm": 3.0884261143976093, "kl": 0.0144500732421875, "learning_rate": 8.199892888460257e-07, "loss": 0.0072, "num_tokens": 79587339.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9736486673355103, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008385610714534766, "rewards/wordcountpos_reward/raw_geo/std": 0.2659185418460851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1222.9375, "completions/mean_terminated_length": 1056.7000732421875, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.3660732146429286, "frac_reward_zero_std": 0.0, "grad_norm": 3.4423035411222758, "kl": 0.017242431640625, "learning_rate": 8.197378341577969e-07, "loss": 0.0079, "num_tokens": 79633778.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9648916721343994, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21919891557510968, "rewards/wordcountpos_reward/raw_geo/std": 0.2835315590206603, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1077.25, "completions/mean_terminated_length": 1077.25, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.3662732546509302, "frac_reward_zero_std": 0.0, "grad_norm": 3.7603464526984602, "kl": 0.018402099609375, "learning_rate": 8.194862479441751e-07, "loss": -0.0077, "num_tokens": 79673342.0, "reward": 0.0, "reward_std": 0.5395322442054749, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13901132861790835, "rewards/wordcountpos_reward/raw_geo/std": 0.0927639189482725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1032.9375, "completions/mean_terminated_length": 1001.800048828125, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.3664732946589318, "frac_reward_zero_std": 0.0, "grad_norm": 3.772490719780789, "kl": 0.014617919921875, "learning_rate": 8.192345303278351e-07, "loss": 0.0321, "num_tokens": 79724645.0, "reward": 0.0, "reward_std": 1.0520801544189453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07846940681682796, "rewards/wordcountpos_reward/raw_geo/std": 0.10355564138529583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1011.6875, "completions/mean_terminated_length": 1011.6875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.36667333466693336, "frac_reward_zero_std": 0.0, "grad_norm": 2.3522727681567503, "kl": 0.00550079345703125, "learning_rate": 8.189826814315157e-07, "loss": -0.0556, "num_tokens": 79771856.0, "reward": 0.0, "reward_std": 0.9150753021240234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03839500281230674, "rewards/wordcountpos_reward/raw_geo/std": 0.04427347112677234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1365.375, "completions/mean_terminated_length": 1260.6666259765625, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.36687337467493497, "frac_reward_zero_std": 0.0, "grad_norm": 3.061998896130383, "kl": 0.0146942138671875, "learning_rate": 8.187307013780192e-07, "loss": -0.0257, "num_tokens": 79826558.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8393835425376892, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2577978184836686, "rewards/wordcountpos_reward/raw_geo/std": 0.19121356925209101, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1127.125, "completions/mean_terminated_length": 1127.125, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3670734146829366, "frac_reward_zero_std": 0.0, "grad_norm": 3.4650944330297193, "kl": 0.017059326171875, "learning_rate": 8.184785902902125e-07, "loss": -0.0144, "num_tokens": 79868576.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9694485664367676, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01570201510441444, "rewards/wordcountpos_reward/raw_geo/std": 0.06974948777510649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1195.125, "completions/mean_terminated_length": 1174.800048828125, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.3672734546909382, "frac_reward_zero_std": 0.0, "grad_norm": 3.40036586796964, "kl": 0.018310546875, "learning_rate": 8.182263482910263e-07, "loss": 0.0458, "num_tokens": 79920738.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9649240970611572, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19635494759081326, "rewards/wordcountpos_reward/raw_geo/std": 0.1508074744900027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1084.0625, "completions/mean_terminated_length": 1084.0625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.3674734946989398, "frac_reward_zero_std": 0.0, "grad_norm": 3.375024906691013, "kl": 0.01593017578125, "learning_rate": 8.179739755034543e-07, "loss": -0.0244, "num_tokens": 79956307.0, "reward": -7.450580596923828e-09, "reward_std": 1.0107815265655518, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05276250456531001, "rewards/wordcountpos_reward/raw_geo/std": 0.06400813602425351, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1115.625, "completions/mean_terminated_length": 1115.625, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.36767353470694136, "frac_reward_zero_std": 0.0, "grad_norm": 2.6501237195453733, "kl": 0.0118560791015625, "learning_rate": 8.17721472050555e-07, "loss": 0.0298, "num_tokens": 79996685.0, "reward": 2.60770320892334e-08, "reward_std": 1.0687845945358276, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17483110523366335, "rewards/wordcountpos_reward/raw_geo/std": 0.09664237231048073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1324.625, "completions/mean_terminated_length": 1284.1539306640625, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.367873574714943, "frac_reward_zero_std": 0.0, "grad_norm": 2.652782203586809, "kl": 0.0120697021484375, "learning_rate": 8.174688380554505e-07, "loss": -0.0672, "num_tokens": 80049727.0, "reward": 0.0, "reward_std": 0.9376136660575867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008059046860446482, "rewards/wordcountpos_reward/raw_geo/std": 0.19958827714430133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1124.5, "completions/mean_terminated_length": 1070.857177734375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.3680736147229446, "frac_reward_zero_std": 0.0, "grad_norm": 2.3860024329956464, "kl": 0.0117645263671875, "learning_rate": 8.17216073641326e-07, "loss": -0.0094, "num_tokens": 80094503.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9552346467971802, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07014013620540586, "rewards/wordcountpos_reward/raw_geo/std": 0.11240748096020631, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346314, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1117.75, "completions/mean_terminated_length": 1063.1429443359375, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.3682736547309462, "frac_reward_zero_std": 0.0, "grad_norm": 2.798661927667315, "kl": 0.014251708984375, "learning_rate": 8.169631789314304e-07, "loss": -0.0037, "num_tokens": 80128987.0, "reward": 0.0, "reward_std": 0.5800498127937317, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025800158249784544, "rewards/wordcountpos_reward/raw_geo/std": 0.05002555048519393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1326.375, "completions/mean_terminated_length": 1301.571533203125, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.3684736947389478, "frac_reward_zero_std": 0.0, "grad_norm": 2.7011703308118733, "kl": 0.01255035400390625, "learning_rate": 8.167101540490765e-07, "loss": 0.0015, "num_tokens": 80186137.0, "reward": 0.0, "reward_std": 0.4640793800354004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2221206158744349, "rewards/wordcountpos_reward/raw_geo/std": 0.27423727957163163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1105.0, "completions/mean_terminated_length": 1078.666748046875, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.36867373474694937, "frac_reward_zero_std": 0.0, "grad_norm": 2.710687970510179, "kl": 0.012603759765625, "learning_rate": 8.164569991176405e-07, "loss": 0.0025, "num_tokens": 80229001.0, "reward": 5.960464477539063e-08, "reward_std": 0.8225865364074707, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12277039445562876, "rewards/wordcountpos_reward/raw_geo/std": 0.15826646799275648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1136.0, "completions/mean_length": 987.5, "completions/mean_terminated_length": 953.3333740234375, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.368873774754951, "frac_reward_zero_std": 0.0, "grad_norm": 2.860698521947595, "kl": 0.0139617919921875, "learning_rate": 8.162037142605618e-07, "loss": 0.0304, "num_tokens": 80274153.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9617314338684082, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07883161230613318, "rewards/wordcountpos_reward/raw_geo/std": 0.061161064731399976, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1074.5, "completions/mean_terminated_length": 1074.5, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.3690738147629526, "frac_reward_zero_std": 0.0, "grad_norm": 3.749670466898167, "kl": 0.021209716796875, "learning_rate": 8.159502996013432e-07, "loss": 0.0282, "num_tokens": 80314889.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9347069263458252, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11362970597815501, "rewards/wordcountpos_reward/raw_geo/std": 0.08290296972841302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1259.5625, "completions/mean_terminated_length": 1225.21435546875, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.3692738547709542, "frac_reward_zero_std": 0.0, "grad_norm": 2.4619994178552678, "kl": 0.01153564453125, "learning_rate": 8.156967552635507e-07, "loss": -0.0477, "num_tokens": 80349322.0, "reward": 0.0, "reward_std": 0.8978796005249023, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21797604515259808, "rewards/wordcountpos_reward/raw_geo/std": 0.06584706291386985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1124.5, "completions/mean_terminated_length": 1099.4666748046875, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.3694738947789558, "frac_reward_zero_std": 0.0, "grad_norm": 2.8384266924612613, "kl": 0.0146026611328125, "learning_rate": 8.154430813708139e-07, "loss": 0.0388, "num_tokens": 80390146.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9611221551895142, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01323351065717172, "rewards/wordcountpos_reward/raw_geo/std": 0.051494527922501664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 973.8125, "completions/mean_terminated_length": 973.8125, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.36967393478695737, "frac_reward_zero_std": 0.0, "grad_norm": 3.5800364932268693, "kl": 0.0137939453125, "learning_rate": 8.151892780468255e-07, "loss": 0.0432, "num_tokens": 80439639.0, "reward": -1.4901161193847656e-08, "reward_std": 0.932483434677124, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011671567667568904, "rewards/wordcountpos_reward/raw_geo/std": 0.11520379441484395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.369873974794959, "frac_reward_zero_std": 0.0, "grad_norm": 3.369332224108131, "kl": 0.01531982421875, "learning_rate": 8.149353454153407e-07, "loss": -0.0268, "num_tokens": 80483816.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7892588376998901, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10674324255055707, "rewards/wordcountpos_reward/raw_geo/std": 0.10267537527623709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1041.857177734375, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.3700740148029606, "frac_reward_zero_std": 0.0, "grad_norm": 3.133686382733146, "kl": 0.013427734375, "learning_rate": 8.146812836001785e-07, "loss": 0.0355, "num_tokens": 80520610.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7959136962890625, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07482712627769707, "rewards/wordcountpos_reward/raw_geo/std": 0.1138851042842639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 999.3125, "completions/mean_terminated_length": 999.3125, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.3702740548109622, "frac_reward_zero_std": 0.0, "grad_norm": 3.462261724574105, "kl": 0.016815185546875, "learning_rate": 8.144270927252204e-07, "loss": 0.0059, "num_tokens": 80563279.0, "reward": 0.0, "reward_std": 0.8265146017074585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03689876859721495, "rewards/wordcountpos_reward/raw_geo/std": 0.13863521092532205, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.3704740948189638, "frac_reward_zero_std": 0.0, "grad_norm": 3.333794047937921, "kl": 0.0162200927734375, "learning_rate": 8.141727729144112e-07, "loss": -0.0025, "num_tokens": 80594014.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7653108835220337, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2791406663851336, "rewards/wordcountpos_reward/raw_geo/std": 0.16561056645250977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1163.8182373046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3706741348269654, "frac_reward_zero_std": 0.0, "grad_norm": 2.4426261727639056, "kl": 0.0106658935546875, "learning_rate": 8.139183242917584e-07, "loss": -0.0181, "num_tokens": 80639532.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8818885087966919, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03210156053205511, "rewards/wordcountpos_reward/raw_geo/std": 0.19276358220791479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 1069.125, "completions/mean_terminated_length": 1040.4000244140625, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.370874174834967, "frac_reward_zero_std": 0.0, "grad_norm": 3.462912751322825, "kl": 0.016387939453125, "learning_rate": 8.136637469813322e-07, "loss": 0.023, "num_tokens": 80687206.0, "reward": 0.0, "reward_std": 0.8476862907409668, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0008727951135355152, "rewards/wordcountpos_reward/raw_geo/std": 0.10408340425693237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1127.9375, "completions/mean_terminated_length": 1127.9375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.3710742148429686, "frac_reward_zero_std": 0.0, "grad_norm": 3.660521321383793, "kl": 0.017608642578125, "learning_rate": 8.134090411072658e-07, "loss": 0.0341, "num_tokens": 80729037.0, "reward": 2.60770320892334e-08, "reward_std": 0.9623128175735474, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009920104988550593, "rewards/wordcountpos_reward/raw_geo/std": 0.018840685589641703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.3712742548509702, "frac_reward_zero_std": 0.0, "grad_norm": 3.61923799356543, "kl": 0.01715087890625, "learning_rate": 8.131542067937548e-07, "loss": -0.025, "num_tokens": 80764208.0, "reward": 0.0, "reward_std": 1.0515928268432617, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1173617770513525, "rewards/wordcountpos_reward/raw_geo/std": 0.0839586337671737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1382.125, "completions/mean_terminated_length": 1264.25, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.3714742948589718, "frac_reward_zero_std": 0.0, "grad_norm": 3.1547170307952532, "kl": 0.01800537109375, "learning_rate": 8.128992441650576e-07, "loss": 0.0075, "num_tokens": 80821066.0, "reward": 0.0, "reward_std": 0.5757333040237427, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06549123326091302, "rewards/wordcountpos_reward/raw_geo/std": 0.12149685215900144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1262.25, "completions/mean_terminated_length": 1183.0, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.3716743348669734, "frac_reward_zero_std": 0.0, "grad_norm": 3.3606634785890557, "kl": 0.0180816650390625, "learning_rate": 8.12644153345495e-07, "loss": 0.0378, "num_tokens": 80871526.0, "reward": 0.0, "reward_std": 0.9643378257751465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2289128533987056, "rewards/wordcountpos_reward/raw_geo/std": 0.15675590382858848, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1268.6875, "completions/mean_terminated_length": 1253.2667236328125, "completions/min_length": 1119.0, "completions/min_terminated_length": 1119.0, "epoch": 0.371874374874975, "frac_reward_zero_std": 0.0, "grad_norm": 2.6221149231394136, "kl": 0.012451171875, "learning_rate": 8.123889344594509e-07, "loss": 0.0037, "num_tokens": 80915705.0, "reward": 0.0, "reward_std": 0.8176782131195068, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16005347872427947, "rewards/wordcountpos_reward/raw_geo/std": 0.07748663831624279, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1082.625, "completions/mean_terminated_length": 1082.625, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.3720744148829766, "frac_reward_zero_std": 0.0, "grad_norm": 3.138903452248144, "kl": 0.0123443603515625, "learning_rate": 8.121335876313706e-07, "loss": -0.0106, "num_tokens": 80955875.0, "reward": 0.0, "reward_std": 0.8863394856452942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0692910885253618, "rewards/wordcountpos_reward/raw_geo/std": 0.06053219819904118, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1024.375, "completions/mean_terminated_length": 992.666748046875, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.3722744548909782, "frac_reward_zero_std": 0.0, "grad_norm": 2.9354629314553975, "kl": 0.0108489990234375, "learning_rate": 8.118781129857628e-07, "loss": -0.0141, "num_tokens": 80999209.0, "reward": 0.0, "reward_std": 0.756900429725647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08557220608108208, "rewards/wordcountpos_reward/raw_geo/std": 0.08448798990737123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1275.875, "completions/mean_terminated_length": 1275.875, "completions/min_length": 1108.0, "completions/min_terminated_length": 1108.0, "epoch": 0.37247449489897977, "frac_reward_zero_std": 0.0, "grad_norm": 2.4051441685279995, "kl": 0.009002685546875, "learning_rate": 8.116225106471978e-07, "loss": -0.0172, "num_tokens": 81037215.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9300907254219055, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05385873578666606, "rewards/wordcountpos_reward/raw_geo/std": 0.08224782029935941, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 961.8125, "completions/mean_terminated_length": 961.8125, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.3726745349069814, "frac_reward_zero_std": 0.0, "grad_norm": 3.518761209492957, "kl": 0.018524169921875, "learning_rate": 8.113667807403089e-07, "loss": -0.0051, "num_tokens": 81073156.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7832204103469849, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012379012150703632, "rewards/wordcountpos_reward/raw_geo/std": 0.07674561883460404, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1247.5, "completions/mean_terminated_length": 1247.5, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.372874574914983, "frac_reward_zero_std": 0.0, "grad_norm": 2.3484643951945947, "kl": 0.00971221923828125, "learning_rate": 8.111109233897906e-07, "loss": -0.0368, "num_tokens": 81112492.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6269381046295166, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.019791208422819635, "rewards/wordcountpos_reward/raw_geo/std": 0.1581129781355918, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1204.9375, "completions/mean_terminated_length": 1162.7857666015625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.3730746149229846, "frac_reward_zero_std": 0.0, "grad_norm": 2.981603826177926, "kl": 0.0142669677734375, "learning_rate": 8.108549387204003e-07, "loss": -0.0261, "num_tokens": 81159467.0, "reward": -5.960464477539063e-08, "reward_std": 0.3111901581287384, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.000691979650867508, "rewards/wordcountpos_reward/raw_geo/std": 0.11500327837435816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1127.375, "completions/mean_terminated_length": 1127.375, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.3732746549309862, "frac_reward_zero_std": 0.0, "grad_norm": 3.2453506066105158, "kl": 0.0151214599609375, "learning_rate": 8.105988268569574e-07, "loss": 0.0199, "num_tokens": 81203633.0, "reward": 0.0, "reward_std": 0.9021538496017456, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07556178352899041, "rewards/wordcountpos_reward/raw_geo/std": 0.17826632855308816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1315.625, "completions/mean_terminated_length": 1131.25, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.3734746949389878, "frac_reward_zero_std": 0.0, "grad_norm": 2.8131413864184305, "kl": 0.0121917724609375, "learning_rate": 8.103425879243434e-07, "loss": 0.0419, "num_tokens": 81255915.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7938408851623535, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028796416093208238, "rewards/wordcountpos_reward/raw_geo/std": 0.09662106698154437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1145.875, "completions/mean_terminated_length": 1145.875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.3736747349469894, "frac_reward_zero_std": 0.0, "grad_norm": 3.0650565775564305, "kl": 0.01324462890625, "learning_rate": 8.100862220475012e-07, "loss": -0.0256, "num_tokens": 81290049.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0591579675674438, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17444215583776151, "rewards/wordcountpos_reward/raw_geo/std": 0.11300093737760178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 895.1875, "completions/mean_terminated_length": 895.1875, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.373874774954991, "frac_reward_zero_std": 0.0, "grad_norm": 4.220325569697498, "kl": 0.023956298828125, "learning_rate": 8.098297293514361e-07, "loss": 0.0157, "num_tokens": 81333420.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9887953996658325, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14887187830805024, "rewards/wordcountpos_reward/raw_geo/std": 0.06706316666557359, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.23975295927575427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1073.6875, "completions/mean_terminated_length": 1073.6875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.3740748149629926, "frac_reward_zero_std": 0.0, "grad_norm": 3.763978521516728, "kl": 0.02178955078125, "learning_rate": 8.095731099612152e-07, "loss": 0.0547, "num_tokens": 81382175.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5390594005584717, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01783476792465052, "rewards/wordcountpos_reward/raw_geo/std": 0.10479206560531525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1051.625, "completions/mean_terminated_length": 1021.7333984375, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.3742748549709942, "frac_reward_zero_std": 0.0, "grad_norm": 3.8519201542175057, "kl": 0.0146636962890625, "learning_rate": 8.093163640019671e-07, "loss": -0.0041, "num_tokens": 81420329.0, "reward": -1.4901161193847656e-08, "reward_std": 1.068649411201477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05412474011199657, "rewards/wordcountpos_reward/raw_geo/std": 0.05974750624523525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1197.0, "completions/mean_terminated_length": 1176.800048828125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.3744748949789958, "frac_reward_zero_std": 0.0, "grad_norm": 2.525121655366482, "kl": 0.0125274658203125, "learning_rate": 8.090594915988823e-07, "loss": -0.0214, "num_tokens": 81464977.0, "reward": -7.450580596923828e-09, "reward_std": 1.052506685256958, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.050312543323886884, "rewards/wordcountpos_reward/raw_geo/std": 0.08035050714461821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1069.375, "completions/mean_terminated_length": 1069.375, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.3746749349869974, "frac_reward_zero_std": 0.0, "grad_norm": 3.186200183313209, "kl": 0.0144805908203125, "learning_rate": 8.088024928772133e-07, "loss": -0.032, "num_tokens": 81514199.0, "reward": 0.0, "reward_std": 0.8151905536651611, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006231681982518772, "rewards/wordcountpos_reward/raw_geo/std": 0.11669170906464493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1237.0, "completions/mean_terminated_length": 1219.4666748046875, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.374874974994999, "frac_reward_zero_std": 0.0, "grad_norm": 3.1189958831984264, "kl": 0.0176544189453125, "learning_rate": 8.085453679622733e-07, "loss": -0.0213, "num_tokens": 81559367.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0618932247161865, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019118860335724054, "rewards/wordcountpos_reward/raw_geo/std": 0.0851130398743582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1307.8125, "completions/mean_terminated_length": 1243.75, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.3750750150030006, "frac_reward_zero_std": 0.0, "grad_norm": 3.2186249935994735, "kl": 0.01312255859375, "learning_rate": 8.08288116979438e-07, "loss": -0.0156, "num_tokens": 81612668.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0660264492034912, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08660032463575403, "rewards/wordcountpos_reward/raw_geo/std": 0.2848267154172795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1169.375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.3752750550110022, "frac_reward_zero_std": 0.0, "grad_norm": 2.522855626455326, "kl": 0.0129547119140625, "learning_rate": 8.080307400541438e-07, "loss": -0.0259, "num_tokens": 81663442.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9226261377334595, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03787609101500173, "rewards/wordcountpos_reward/raw_geo/std": 0.13690495238141798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1031.857177734375, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.3754750950190038, "frac_reward_zero_std": 0.0, "grad_norm": 2.481631576153375, "kl": 0.00853729248046875, "learning_rate": 8.077732373118892e-07, "loss": -0.039, "num_tokens": 81714144.0, "reward": 0.0, "reward_std": 0.6557045578956604, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1161541012946108, "rewards/wordcountpos_reward/raw_geo/std": 0.1707889175429663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1122.9375, "completions/mean_terminated_length": 1069.071533203125, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.3756751350270054, "frac_reward_zero_std": 0.0, "grad_norm": 3.2440051727327015, "kl": 0.021240234375, "learning_rate": 8.075156088782336e-07, "loss": 0.0173, "num_tokens": 81756359.0, "reward": 0.0, "reward_std": 0.9885783195495605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09043644444842076, "rewards/wordcountpos_reward/raw_geo/std": 0.17240085471737693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1344.375, "completions/mean_terminated_length": 1223.3333740234375, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.375875175035007, "frac_reward_zero_std": 0.0, "grad_norm": 3.3864190807251497, "kl": 0.02215576171875, "learning_rate": 8.072578548787977e-07, "loss": -0.0006, "num_tokens": 81806677.0, "reward": 0.0, "reward_std": 0.8474569916725159, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08331627754897028, "rewards/wordcountpos_reward/raw_geo/std": 0.07406014793765668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.161245154965971, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1395.1875, "completions/mean_terminated_length": 1290.375, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3760752150430086, "frac_reward_zero_std": 0.0, "grad_norm": 2.969836669081187, "kl": 0.014556884765625, "learning_rate": 8.069999754392635e-07, "loss": -0.0172, "num_tokens": 81858560.0, "reward": 0.0, "reward_std": 0.6628202199935913, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23797623216303446, "rewards/wordcountpos_reward/raw_geo/std": 0.2661596562254852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1209.25, "completions/mean_terminated_length": 1189.86669921875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.3762752550510102, "frac_reward_zero_std": 0.0, "grad_norm": 3.2050093578750936, "kl": 0.016571044921875, "learning_rate": 8.067419706853744e-07, "loss": -0.0001, "num_tokens": 81903196.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9129930734634399, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027261719297969597, "rewards/wordcountpos_reward/raw_geo/std": 0.06716088882655795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767716, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1045.75, "completions/mean_terminated_length": 1045.75, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.3764752950590118, "frac_reward_zero_std": 0.0, "grad_norm": 3.578605344271528, "kl": 0.0166168212890625, "learning_rate": 8.064838407429346e-07, "loss": -0.0055, "num_tokens": 81938656.0, "reward": 0.0, "reward_std": 0.7521936893463135, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08213157758908353, "rewards/wordcountpos_reward/raw_geo/std": 0.10115363309653723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1220.875, "completions/mean_terminated_length": 1220.875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.3766753350670134, "frac_reward_zero_std": 0.0, "grad_norm": 2.810146959634015, "kl": 0.0140838623046875, "learning_rate": 8.062255857378093e-07, "loss": 0.0195, "num_tokens": 81986622.0, "reward": 1.4901161193847656e-08, "reward_std": 1.032131314277649, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0015396827552956668, "rewards/wordcountpos_reward/raw_geo/std": 0.3054826164216197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1236.875, "completions/mean_terminated_length": 1149.166748046875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.376875375075015, "frac_reward_zero_std": 0.0, "grad_norm": 3.070952962145064, "kl": 0.0138702392578125, "learning_rate": 8.059672057959249e-07, "loss": -0.0011, "num_tokens": 82040596.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0383249521255493, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1995550056065262, "rewards/wordcountpos_reward/raw_geo/std": 0.21835264134876262, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1175.3077392578125, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.3770754150830166, "frac_reward_zero_std": 0.0, "grad_norm": 3.2556958644413214, "kl": 0.017822265625, "learning_rate": 8.057087010432686e-07, "loss": 0.0179, "num_tokens": 82093159.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9340291023254395, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0845777446405428, "rewards/wordcountpos_reward/raw_geo/std": 0.2351616726282149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1100.25, "completions/mean_terminated_length": 1100.25, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.37727545509101823, "frac_reward_zero_std": 0.0, "grad_norm": 3.7600843921522267, "kl": 0.0196533203125, "learning_rate": 8.054500716058886e-07, "loss": 0.0105, "num_tokens": 82145347.0, "reward": 0.0, "reward_std": 0.8025621175765991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09784720800465033, "rewards/wordcountpos_reward/raw_geo/std": 0.2004384300014111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1134.75, "completions/mean_terminated_length": 1110.4000244140625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.3774754950990198, "frac_reward_zero_std": 0.0, "grad_norm": 2.889865735440464, "kl": 0.01061248779296875, "learning_rate": 8.051913176098937e-07, "loss": -0.0038, "num_tokens": 82187375.0, "reward": 0.0, "reward_std": 0.9804905652999878, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0034500681463277188, "rewards/wordcountpos_reward/raw_geo/std": 0.07566302851856088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05374838498865701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1086.3125, "completions/mean_terminated_length": 1086.3125, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.3776755351070214, "frac_reward_zero_std": 0.0, "grad_norm": 3.5220808448269145, "kl": 0.021697998046875, "learning_rate": 8.049324391814534e-07, "loss": -0.0521, "num_tokens": 82229972.0, "reward": 0.0, "reward_std": 0.9319818019866943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.37637222217987093, "rewards/wordcountpos_reward/raw_geo/std": 0.15539419960637904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1036.3333740234375, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.377875575115023, "frac_reward_zero_std": 0.0, "grad_norm": 3.566475860277026, "kl": 0.01519775390625, "learning_rate": 8.046734364467983e-07, "loss": 0.0214, "num_tokens": 82281929.0, "reward": 2.9802322387695312e-08, "reward_std": 0.821873128414154, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01184074625030777, "rewards/wordcountpos_reward/raw_geo/std": 0.099791638864631, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 970.9375, "completions/mean_terminated_length": 970.9375, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.3780756151230246, "frac_reward_zero_std": 0.0, "grad_norm": 3.644251450810969, "kl": 0.017333984375, "learning_rate": 8.044143095322191e-07, "loss": 0.0218, "num_tokens": 82322832.0, "reward": 0.0, "reward_std": 0.8006108403205872, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12139100008409892, "rewards/wordcountpos_reward/raw_geo/std": 0.1563176934500072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1329.875, "completions/mean_terminated_length": 1227.800048828125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.3782756551310262, "frac_reward_zero_std": 0.0, "grad_norm": 3.0586865433023194, "kl": 0.0140228271484375, "learning_rate": 8.041550585640672e-07, "loss": -0.0168, "num_tokens": 82370886.0, "reward": -5.960464477539063e-08, "reward_std": 0.5831820368766785, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19207673918592394, "rewards/wordcountpos_reward/raw_geo/std": 0.20628932244654444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.15962919996504865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1239.75, "completions/mean_terminated_length": 1202.571533203125, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.3784756951390278, "frac_reward_zero_std": 0.0, "grad_norm": 2.9939547043824777, "kl": 0.0130462646484375, "learning_rate": 8.038956836687548e-07, "loss": 0.0198, "num_tokens": 82414578.0, "reward": 0.0, "reward_std": 0.9306091070175171, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14981470939782926, "rewards/wordcountpos_reward/raw_geo/std": 0.05266120876393239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1171.5, "completions/mean_terminated_length": 1124.571533203125, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.3786757351470294, "frac_reward_zero_std": 0.0, "grad_norm": 2.711848169448823, "kl": 0.01030731201171875, "learning_rate": 8.03636184972754e-07, "loss": 0.0654, "num_tokens": 82457890.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0340769290924072, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057395746879333985, "rewards/wordcountpos_reward/raw_geo/std": 0.10822074335298774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1213.0, "completions/mean_terminated_length": 1082.5455322265625, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.378875775155031, "frac_reward_zero_std": 0.0, "grad_norm": 2.443336326365855, "kl": 0.00922393798828125, "learning_rate": 8.033765626025977e-07, "loss": -0.0137, "num_tokens": 82506202.0, "reward": 0.0, "reward_std": 0.4957790970802307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10059129430785817, "rewards/wordcountpos_reward/raw_geo/std": 0.08658025150727795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1118.1875, "completions/mean_terminated_length": 1118.1875, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.3790758151630326, "frac_reward_zero_std": 0.0, "grad_norm": 2.910752801774246, "kl": 0.01043701171875, "learning_rate": 8.03116816684879e-07, "loss": -0.0203, "num_tokens": 82547973.0, "reward": 7.450580596923828e-09, "reward_std": 1.0637421607971191, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07354794964001503, "rewards/wordcountpos_reward/raw_geo/std": 0.09675585555016318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1091.125, "completions/mean_terminated_length": 1063.86669921875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.3792758551710342, "frac_reward_zero_std": 0.0, "grad_norm": 3.506860643230018, "kl": 0.019622802734375, "learning_rate": 8.028569473462509e-07, "loss": 0.0017, "num_tokens": 82580807.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0047712326049805, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054303418941168954, "rewards/wordcountpos_reward/raw_geo/std": 0.034672807493010945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1012.875, "completions/mean_terminated_length": 1012.875, "completions/min_length": 624.0, "completions/min_terminated_length": 624.0, "epoch": 0.3794758951790358, "frac_reward_zero_std": 0.0, "grad_norm": 3.8299178980494855, "kl": 0.021484375, "learning_rate": 8.025969547134273e-07, "loss": 0.0149, "num_tokens": 82618805.0, "reward": 2.9802322387695312e-08, "reward_std": 0.831100344657898, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11396198132369267, "rewards/wordcountpos_reward/raw_geo/std": 0.10066201792974971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1226.625, "completions/mean_terminated_length": 1226.625, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.3796759351870374, "frac_reward_zero_std": 0.0, "grad_norm": 3.2448259651169975, "kl": 0.0142669677734375, "learning_rate": 8.023368389131815e-07, "loss": 0.0322, "num_tokens": 82664807.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9694257974624634, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10596751817496983, "rewards/wordcountpos_reward/raw_geo/std": 0.34621208559483735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1071.0, "completions/mean_terminated_length": 1071.0, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.379875975195039, "frac_reward_zero_std": 0.0, "grad_norm": 3.2259274038463164, "kl": 0.016845703125, "learning_rate": 8.020766000723471e-07, "loss": -0.042, "num_tokens": 82717111.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7183211445808411, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14948507660395502, "rewards/wordcountpos_reward/raw_geo/std": 0.29935519168844377, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1083.9375, "completions/mean_terminated_length": 1083.9375, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.38007601520304063, "frac_reward_zero_std": 0.0, "grad_norm": 3.747387611436037, "kl": 0.016876220703125, "learning_rate": 8.01816238317818e-07, "loss": 0.0468, "num_tokens": 82759254.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9764562845230103, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11053239657412174, "rewards/wordcountpos_reward/raw_geo/std": 0.15269617347572706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1144.75, "completions/mean_terminated_length": 1144.75, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3802760552110422, "frac_reward_zero_std": 0.0, "grad_norm": 3.377289431117488, "kl": 0.022613525390625, "learning_rate": 8.015557537765475e-07, "loss": -0.0154, "num_tokens": 82810106.0, "reward": 0.0, "reward_std": 0.7665531039237976, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0014095033058528997, "rewards/wordcountpos_reward/raw_geo/std": 0.22012106481303725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1278.8125, "completions/mean_terminated_length": 1264.0667724609375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.3804760952190438, "frac_reward_zero_std": 0.0, "grad_norm": 3.2903890421268502, "kl": 0.01605224609375, "learning_rate": 8.012951465755493e-07, "loss": 0.0512, "num_tokens": 82864639.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8410732746124268, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015532918243747168, "rewards/wordcountpos_reward/raw_geo/std": 0.07347207971178882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 956.375, "completions/mean_terminated_length": 920.1333618164062, "completions/min_length": 590.0, "completions/min_terminated_length": 590.0, "epoch": 0.3806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 4.096327591645464, "kl": 0.0208740234375, "learning_rate": 8.010344168418965e-07, "loss": -0.0183, "num_tokens": 82906685.0, "reward": 0.0, "reward_std": 0.8456340432167053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12525115547120777, "rewards/wordcountpos_reward/raw_geo/std": 0.0786926357999723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1209.875, "completions/mean_terminated_length": 1209.875, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.380876175235047, "frac_reward_zero_std": 0.0, "grad_norm": 2.8117479246415122, "kl": 0.014495849609375, "learning_rate": 8.00773564702722e-07, "loss": 0.028, "num_tokens": 82948611.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8906652331352234, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023887161527222603, "rewards/wordcountpos_reward/raw_geo/std": 0.08909066334246608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 924.1875, "completions/mean_terminated_length": 924.1875, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.38107621524304863, "frac_reward_zero_std": 0.0, "grad_norm": 2.4799648897793007, "kl": 0.0122833251953125, "learning_rate": 8.005125902852187e-07, "loss": -0.0128, "num_tokens": 82988510.0, "reward": 0.0, "reward_std": 0.7398412823677063, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0534892601617431, "rewards/wordcountpos_reward/raw_geo/std": 0.053296167831901234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1029.1875, "completions/mean_terminated_length": 1029.1875, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.3812762552510502, "frac_reward_zero_std": 0.0, "grad_norm": 3.840358329587029, "kl": 0.020843505859375, "learning_rate": 8.002514937166387e-07, "loss": -0.0113, "num_tokens": 83038641.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0676360130310059, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2009288174816079, "rewards/wordcountpos_reward/raw_geo/std": 0.28535041117784893, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1167.75, "completions/mean_terminated_length": 1167.75, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.3814762952590518, "frac_reward_zero_std": 0.0, "grad_norm": 3.6111888905532146, "kl": 0.018646240234375, "learning_rate": 7.999902751242942e-07, "loss": -0.0054, "num_tokens": 83077181.0, "reward": 0.0, "reward_std": 0.7873083353042603, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010181010317690024, "rewards/wordcountpos_reward/raw_geo/std": 0.12025182911661554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1273.5625, "completions/mean_terminated_length": 1137.7000732421875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.3816763352670534, "frac_reward_zero_std": 0.0, "grad_norm": 2.6335557711050406, "kl": 0.010528564453125, "learning_rate": 7.997289346355562e-07, "loss": 0.0049, "num_tokens": 83130646.0, "reward": 0.0, "reward_std": 0.7262375354766846, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2436238092014079, "rewards/wordcountpos_reward/raw_geo/std": 0.39267478350127427, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1124.8125, "completions/mean_terminated_length": 1124.8125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.381876375275055, "frac_reward_zero_std": 0.0, "grad_norm": 3.450853428056311, "kl": 0.01708984375, "learning_rate": 7.994674723778559e-07, "loss": 0.0041, "num_tokens": 83171731.0, "reward": 0.0, "reward_std": 0.7523471117019653, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0691691540357116, "rewards/wordcountpos_reward/raw_geo/std": 0.14948410592979197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1204.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 1008.0, "completions/mean_terminated_length": 1008.0, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.38207641528305664, "frac_reward_zero_std": 0.0, "grad_norm": 3.2514580396190254, "kl": 0.0127716064453125, "learning_rate": 7.99205888478683e-07, "loss": -0.0037, "num_tokens": 83224363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9400133490562439, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11794539773957492, "rewards/wordcountpos_reward/raw_geo/std": 0.04181931987584059, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1166.875, "completions/mean_terminated_length": 1144.666748046875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3822764552910582, "frac_reward_zero_std": 0.0, "grad_norm": 3.367947892053645, "kl": 0.015869140625, "learning_rate": 7.989441830655873e-07, "loss": 0.0071, "num_tokens": 83266737.0, "reward": -7.450580596923828e-09, "reward_std": 1.0459463596343994, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0356829687626893, "rewards/wordcountpos_reward/raw_geo/std": 0.12232737379716122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1142.375, "completions/mean_terminated_length": 1118.533447265625, "completions/min_length": 569.0, "completions/min_terminated_length": 569.0, "epoch": 0.3824764952990598, "frac_reward_zero_std": 0.0, "grad_norm": 1.8571663561070442, "kl": 0.00499725341796875, "learning_rate": 7.986823562661776e-07, "loss": -0.072, "num_tokens": 83321543.0, "reward": -2.9802322387695312e-08, "reward_std": 0.680799126625061, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09314868544358086, "rewards/wordcountpos_reward/raw_geo/std": 0.051863610414294346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1206.375, "completions/mean_terminated_length": 1186.800048828125, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.3826765353070614, "frac_reward_zero_std": 0.0, "grad_norm": 2.9906350833456017, "kl": 0.015838623046875, "learning_rate": 7.984204082081217e-07, "loss": 0.0018, "num_tokens": 83373909.0, "reward": 0.0, "reward_std": 0.6740381121635437, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.26119803744006964, "rewards/wordcountpos_reward/raw_geo/std": 0.4648392765067783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1218.8125, "completions/mean_terminated_length": 1178.6429443359375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.38287657531506303, "frac_reward_zero_std": 0.0, "grad_norm": 3.423520409266596, "kl": 0.01885986328125, "learning_rate": 7.981583390191468e-07, "loss": -0.0093, "num_tokens": 83428282.0, "reward": 0.0, "reward_std": 0.7193698883056641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05477221053843989, "rewards/wordcountpos_reward/raw_geo/std": 0.20083458612699479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1210.625, "completions/mean_terminated_length": 1114.166748046875, "completions/min_length": 407.0, "completions/min_terminated_length": 407.0, "epoch": 0.38307661532306464, "frac_reward_zero_std": 0.0, "grad_norm": 3.2288554004054246, "kl": 0.0160675048828125, "learning_rate": 7.978961488270389e-07, "loss": -0.0348, "num_tokens": 83478780.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9395007491111755, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07076686328315875, "rewards/wordcountpos_reward/raw_geo/std": 0.13727354355278978, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1140.3125, "completions/mean_terminated_length": 1116.3333740234375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.3832766553310662, "frac_reward_zero_std": 0.0, "grad_norm": 3.6249742575345674, "kl": 0.017059326171875, "learning_rate": 7.97633837759643e-07, "loss": -0.0431, "num_tokens": 83516481.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0335273742675781, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029078230617636154, "rewards/wordcountpos_reward/raw_geo/std": 0.0537862839793503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1094.875, "completions/mean_terminated_length": 1094.875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3834766953390678, "frac_reward_zero_std": 0.0, "grad_norm": 3.374077741996293, "kl": 0.0152587890625, "learning_rate": 7.973714059448634e-07, "loss": -0.0099, "num_tokens": 83566407.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9962517023086548, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05916642950267013, "rewards/wordcountpos_reward/raw_geo/std": 0.13146134105735469, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1186.0625, "completions/mean_terminated_length": 1186.0625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3836767353470694, "frac_reward_zero_std": 0.0, "grad_norm": 2.0649748135730386, "kl": 0.0080718994140625, "learning_rate": 7.97108853510663e-07, "loss": -0.0155, "num_tokens": 83606872.0, "reward": -3.725290298461914e-09, "reward_std": 1.029812216758728, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07721551483535441, "rewards/wordcountpos_reward/raw_geo/std": 0.08075121500835286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1195.9375, "completions/mean_terminated_length": 1175.666748046875, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.38387677535507103, "frac_reward_zero_std": 0.0, "grad_norm": 3.1599189163991874, "kl": 0.015106201171875, "learning_rate": 7.968461805850635e-07, "loss": -0.0357, "num_tokens": 83649719.0, "reward": 1.862645149230957e-08, "reward_std": 1.005957841873169, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16293989953374052, "rewards/wordcountpos_reward/raw_geo/std": 0.09783115286378856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1143.5, "completions/mean_terminated_length": 1119.7333984375, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.3840768153630726, "frac_reward_zero_std": 0.0, "grad_norm": 3.201375345409664, "kl": 0.017822265625, "learning_rate": 7.965833872961455e-07, "loss": 0.0142, "num_tokens": 83691879.0, "reward": -2.9802322387695312e-08, "reward_std": 0.794905424118042, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.293349978442748, "rewards/wordcountpos_reward/raw_geo/std": 0.12941395563903416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1307.9375, "completions/mean_terminated_length": 1307.9375, "completions/min_length": 1110.0, "completions/min_terminated_length": 1110.0, "epoch": 0.3842768553710742, "frac_reward_zero_std": 0.0, "grad_norm": 2.9169026222099967, "kl": 0.01287841796875, "learning_rate": 7.963204737720481e-07, "loss": 0.0266, "num_tokens": 83727846.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0219136476516724, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04929161266486752, "rewards/wordcountpos_reward/raw_geo/std": 0.08873550372034363, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1097.125, "completions/mean_terminated_length": 1097.125, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.3844768953790758, "frac_reward_zero_std": 0.0, "grad_norm": 3.3889067133813082, "kl": 0.0196990966796875, "learning_rate": 7.960574401409693e-07, "loss": -0.0179, "num_tokens": 83771176.0, "reward": 4.470348358154297e-08, "reward_std": 1.0158910751342773, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1506740780249705, "rewards/wordcountpos_reward/raw_geo/std": 0.2791675303028947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1087.375, "completions/mean_terminated_length": 1059.86669921875, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.3846769353870774, "frac_reward_zero_std": 0.0, "grad_norm": 3.4753802954920094, "kl": 0.019073486328125, "learning_rate": 7.957942865311652e-07, "loss": 0.024, "num_tokens": 83813846.0, "reward": 0.0, "reward_std": 0.81082683801651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11667962115645698, "rewards/wordcountpos_reward/raw_geo/std": 0.17090711079478307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 1023.25, "completions/mean_terminated_length": 1023.25, "completions/min_length": 702.0, "completions/min_terminated_length": 702.0, "epoch": 0.38487697539507904, "frac_reward_zero_std": 0.0, "grad_norm": 3.6915590412072863, "kl": 0.023468017578125, "learning_rate": 7.95531013070951e-07, "loss": -0.0138, "num_tokens": 83863090.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7343430519104004, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06907121137048625, "rewards/wordcountpos_reward/raw_geo/std": 0.09371167337310808, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1222.0, "completions/mean_terminated_length": 1055.2000732421875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.3850770154030806, "frac_reward_zero_std": 0.0, "grad_norm": 3.1274737177814584, "kl": 0.0158233642578125, "learning_rate": 7.952676198886997e-07, "loss": 0.0192, "num_tokens": 83912826.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0447074174880981, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15561142380306806, "rewards/wordcountpos_reward/raw_geo/std": 0.07617022697144082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1351.875, "completions/mean_terminated_length": 1284.5455322265625, "completions/min_length": 1088.0, "completions/min_terminated_length": 1088.0, "epoch": 0.3852770554110822, "frac_reward_zero_std": 0.0, "grad_norm": 2.840321222105165, "kl": 0.0164947509765625, "learning_rate": 7.950041071128433e-07, "loss": -0.0318, "num_tokens": 83968232.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0447741746902466, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.088066999987892, "rewards/wordcountpos_reward/raw_geo/std": 0.0254594803666307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1140.9375, "completions/mean_terminated_length": 1140.9375, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.3854770954190838, "frac_reward_zero_std": 0.0, "grad_norm": 3.7069250903539834, "kl": 0.01629638671875, "learning_rate": 7.947404748718717e-07, "loss": 0.0183, "num_tokens": 84004031.0, "reward": 0.0, "reward_std": 0.6612793803215027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07883841972233559, "rewards/wordcountpos_reward/raw_geo/std": 0.09162495810246825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1033.9375, "completions/mean_terminated_length": 1033.9375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.3856771354270854, "frac_reward_zero_std": 0.0, "grad_norm": 2.9460353264656107, "kl": 0.0142059326171875, "learning_rate": 7.944767232943333e-07, "loss": 0.0391, "num_tokens": 84052886.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8299098610877991, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0011428826439418742, "rewards/wordcountpos_reward/raw_geo/std": 0.062149762707538206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1083.25, "completions/mean_terminated_length": 1083.25, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.38587717543508704, "frac_reward_zero_std": 0.0, "grad_norm": 3.5411879222662117, "kl": 0.023040771484375, "learning_rate": 7.942128525088344e-07, "loss": 0.034, "num_tokens": 84095522.0, "reward": 4.470348358154297e-08, "reward_std": 0.9512640237808228, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01015780803818505, "rewards/wordcountpos_reward/raw_geo/std": 0.12137534592592701, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1154.875, "completions/mean_terminated_length": 1105.571533203125, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.3860772154430886, "frac_reward_zero_std": 0.0, "grad_norm": 3.647945775186784, "kl": 0.0185394287109375, "learning_rate": 7.939488626440398e-07, "loss": -0.0045, "num_tokens": 84137736.0, "reward": 0.0, "reward_std": 0.7156025767326355, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22226411035292845, "rewards/wordcountpos_reward/raw_geo/std": 0.21882135942235223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722953, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1157.3125, "completions/mean_terminated_length": 1157.3125, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3862772554510902, "frac_reward_zero_std": 0.0, "grad_norm": 2.975455875513827, "kl": 0.014312744140625, "learning_rate": 7.936847538286718e-07, "loss": -0.025, "num_tokens": 84185613.0, "reward": 0.0, "reward_std": 0.9196183681488037, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0779048186616783, "rewards/wordcountpos_reward/raw_geo/std": 0.13039687629287663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1078.8125, "completions/mean_terminated_length": 1078.8125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3864772954590918, "frac_reward_zero_std": 0.0, "grad_norm": 3.07950053345988, "kl": 0.0143280029296875, "learning_rate": 7.934205261915114e-07, "loss": 0.0185, "num_tokens": 84225122.0, "reward": 0.0, "reward_std": 1.0661547183990479, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05891146031039509, "rewards/wordcountpos_reward/raw_geo/std": 0.07466751504177721, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1143.2308349609375, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.38667733546709343, "frac_reward_zero_std": 0.0, "grad_norm": 2.7950355832783096, "kl": 0.0130462646484375, "learning_rate": 7.931561798613972e-07, "loss": 0.0076, "num_tokens": 84277316.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0157172679901123, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02867748336574919, "rewards/wordcountpos_reward/raw_geo/std": 0.14086856255566693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1185.5, "completions/mean_terminated_length": 1080.666748046875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.38687737547509504, "frac_reward_zero_std": 0.0, "grad_norm": 3.3679682694342388, "kl": 0.01898193359375, "learning_rate": 7.928917149672254e-07, "loss": 0.0517, "num_tokens": 84328588.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9801058173179626, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11899428164145383, "rewards/wordcountpos_reward/raw_geo/std": 0.07336294543278063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1092.8125, "completions/mean_terminated_length": 1092.8125, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.3870774154830966, "frac_reward_zero_std": 0.0, "grad_norm": 3.2581363135207675, "kl": 0.020538330078125, "learning_rate": 7.926271316379505e-07, "loss": -0.0258, "num_tokens": 84377953.0, "reward": 0.0, "reward_std": 0.5870635509490967, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07762638823999722, "rewards/wordcountpos_reward/raw_geo/std": 0.14303668763566904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1117.8125, "completions/mean_terminated_length": 1117.8125, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.3872774554910982, "frac_reward_zero_std": 0.0, "grad_norm": 3.259911931430322, "kl": 0.014923095703125, "learning_rate": 7.923624300025844e-07, "loss": -0.0218, "num_tokens": 84414310.0, "reward": 0.0, "reward_std": 0.8105592727661133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.000733722517796763, "rewards/wordcountpos_reward/raw_geo/std": 0.06051314692325833, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1194.875, "completions/mean_terminated_length": 1194.875, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.3874774954990998, "frac_reward_zero_std": 0.0, "grad_norm": 2.6484300185103353, "kl": 0.0118560791015625, "learning_rate": 7.920976101901968e-07, "loss": -0.056, "num_tokens": 84463524.0, "reward": 0.0, "reward_std": 0.8934857845306396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04211384414680419, "rewards/wordcountpos_reward/raw_geo/std": 0.2333173234249423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1299.5625, "completions/mean_terminated_length": 1299.5625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.38767753550710143, "frac_reward_zero_std": 0.0, "grad_norm": 2.786133816929239, "kl": 0.0147857666015625, "learning_rate": 7.918326723299154e-07, "loss": 0.0176, "num_tokens": 84513917.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0175589323043823, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14600860897973067, "rewards/wordcountpos_reward/raw_geo/std": 0.11890078592203776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1096.6875, "completions/mean_terminated_length": 1096.6875, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.38787757551510305, "frac_reward_zero_std": 0.0, "grad_norm": 2.8142218958907725, "kl": 0.0144500732421875, "learning_rate": 7.915676165509248e-07, "loss": 0.0167, "num_tokens": 84553056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4527949094772339, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1546822520687281, "rewards/wordcountpos_reward/raw_geo/std": 0.18175917391667706, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1204.1875, "completions/mean_terminated_length": 1161.9285888671875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.3880776155231046, "frac_reward_zero_std": 0.0, "grad_norm": 3.47851492965573, "kl": 0.01708984375, "learning_rate": 7.913024429824672e-07, "loss": -0.0271, "num_tokens": 84600171.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7945734262466431, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03318640885596952, "rewards/wordcountpos_reward/raw_geo/std": 0.04347737846261447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1123.75, "completions/mean_terminated_length": 1070.0, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.3882776555311062, "frac_reward_zero_std": 0.0, "grad_norm": 3.533612689886028, "kl": 0.019317626953125, "learning_rate": 7.910371517538428e-07, "loss": 0.0015, "num_tokens": 84643039.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9811118841171265, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09676382869600902, "rewards/wordcountpos_reward/raw_geo/std": 0.10918625189944568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1114.0, "completions/max_terminated_length": 1114.0, "completions/mean_length": 1023.6875, "completions/mean_terminated_length": 1023.6875, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.3884776955391078, "frac_reward_zero_std": 0.0, "grad_norm": 1.2882336552583928, "kl": 0.0042209625244140625, "learning_rate": 7.907717429944086e-07, "loss": 0.0001, "num_tokens": 84684970.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9430273771286011, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03151630712683913, "rewards/wordcountpos_reward/raw_geo/std": 0.0977104276587023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635781, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1018.75, "completions/mean_terminated_length": 1018.75, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.38867773554710944, "frac_reward_zero_std": 0.0, "grad_norm": 3.6331558978513856, "kl": 0.0198974609375, "learning_rate": 7.905062168335794e-07, "loss": -0.0376, "num_tokens": 84716766.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6383984088897705, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14574582130289615, "rewards/wordcountpos_reward/raw_geo/std": 0.15150392951019787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 954.75, "completions/mean_terminated_length": 954.75, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.38887777555511105, "frac_reward_zero_std": 0.0, "grad_norm": 3.208395548761791, "kl": 0.0136566162109375, "learning_rate": 7.902405734008267e-07, "loss": 0.0035, "num_tokens": 84766058.0, "reward": -2.9802322387695312e-08, "reward_std": 0.645635724067688, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17979327969462922, "rewards/wordcountpos_reward/raw_geo/std": 0.21916599585048865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1265.3125, "completions/mean_terminated_length": 1187.0833740234375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3890778155631126, "frac_reward_zero_std": 0.0, "grad_norm": 2.712189161298553, "kl": 0.0118865966796875, "learning_rate": 7.899748128256793e-07, "loss": 0.0129, "num_tokens": 84819799.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8285987973213196, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02933622415489038, "rewards/wordcountpos_reward/raw_geo/std": 0.060910642294256884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1351.125, "completions/mean_terminated_length": 1351.125, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.3892778555711142, "frac_reward_zero_std": 0.0, "grad_norm": 2.769769274498029, "kl": 0.0123291015625, "learning_rate": 7.897089352377237e-07, "loss": 0.006, "num_tokens": 84866361.0, "reward": 0.0, "reward_std": 0.8065032958984375, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027039899476719222, "rewards/wordcountpos_reward/raw_geo/std": 0.07483926024067868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 1019.75, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.38947789557911583, "frac_reward_zero_std": 0.0, "grad_norm": 3.3684930118765517, "kl": 0.0181732177734375, "learning_rate": 7.894429407666024e-07, "loss": -0.0144, "num_tokens": 84907085.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6613297462463379, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06654591786236366, "rewards/wordcountpos_reward/raw_geo/std": 0.06413234359363085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1286.0625, "completions/mean_terminated_length": 1236.6923828125, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.38967793558711744, "frac_reward_zero_std": 0.0, "grad_norm": 2.0636617240426474, "kl": 0.00885009765625, "learning_rate": 7.891768295420164e-07, "loss": -0.0116, "num_tokens": 84940470.0, "reward": -1.1175870895385742e-08, "reward_std": 0.9511070847511292, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10621054853011609, "rewards/wordcountpos_reward/raw_geo/std": 0.08070195674325308, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1204.25, "completions/mean_terminated_length": 1162.0, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.389877975595119, "frac_reward_zero_std": 0.0, "grad_norm": 3.4070034881137334, "kl": 0.0177459716796875, "learning_rate": 7.889106016937219e-07, "loss": 0.0022, "num_tokens": 84974690.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0560686588287354, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016030748797630112, "rewards/wordcountpos_reward/raw_geo/std": 0.058505419223668136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 973.4375, "completions/mean_terminated_length": 973.4375, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3900780156031206, "frac_reward_zero_std": 0.0, "grad_norm": 3.62722112152376, "kl": 0.0143280029296875, "learning_rate": 7.886442573515333e-07, "loss": -0.0332, "num_tokens": 85017705.0, "reward": 0.0, "reward_std": 0.9516506195068359, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01808504943915968, "rewards/wordcountpos_reward/raw_geo/std": 0.1288500030165194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.15533714826025882, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1035.75, "completions/mean_terminated_length": 1035.75, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.3902780556111222, "frac_reward_zero_std": 0.0, "grad_norm": 3.3223812489862357, "kl": 0.013580322265625, "learning_rate": 7.88377796645321e-07, "loss": -0.0466, "num_tokens": 85057565.0, "reward": 0.0, "reward_std": 0.9915995597839355, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0885074400646694, "rewards/wordcountpos_reward/raw_geo/std": 0.07319635640649398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1110.25, "completions/mean_terminated_length": 1084.2667236328125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.39047809561912383, "frac_reward_zero_std": 0.0, "grad_norm": 3.7246271712308556, "kl": 0.0206298828125, "learning_rate": 7.881112197050128e-07, "loss": -0.0254, "num_tokens": 85102025.0, "reward": 0.0, "reward_std": 0.6427106857299805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010441763367497837, "rewards/wordcountpos_reward/raw_geo/std": 0.11485688084397902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1129.5625, "completions/mean_terminated_length": 1104.86669921875, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.39067813562712544, "frac_reward_zero_std": 0.0, "grad_norm": 2.3488824988896155, "kl": 0.0091094970703125, "learning_rate": 7.878445266605926e-07, "loss": 0.0282, "num_tokens": 85146762.0, "reward": 0.0, "reward_std": 0.7387911677360535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015499180445664451, "rewards/wordcountpos_reward/raw_geo/std": 0.05106921887306669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1234.1875, "completions/mean_terminated_length": 1196.21435546875, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.390878175635127, "frac_reward_zero_std": 0.0, "grad_norm": 3.2971555362237144, "kl": 0.015350341796875, "learning_rate": 7.87577717642101e-07, "loss": 0.0105, "num_tokens": 85187941.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8785548210144043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13525414674498734, "rewards/wordcountpos_reward/raw_geo/std": 0.3648798263242208, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1032.375, "completions/mean_terminated_length": 965.5714721679688, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.3910782156431286, "frac_reward_zero_std": 0.0, "grad_norm": 3.2838820027219744, "kl": 0.015777587890625, "learning_rate": 7.873107927796356e-07, "loss": 0.0561, "num_tokens": 85233507.0, "reward": -2.9802322387695312e-08, "reward_std": 0.46401447057724, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18496761020927677, "rewards/wordcountpos_reward/raw_geo/std": 0.132386932920565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1259.875, "completions/mean_terminated_length": 1225.571533203125, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.3912782556511302, "frac_reward_zero_std": 0.0, "grad_norm": 2.905842591462001, "kl": 0.0129852294921875, "learning_rate": 7.8704375220335e-07, "loss": -0.0372, "num_tokens": 85287577.0, "reward": 0.0, "reward_std": 0.5848679542541504, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1104370115588768, "rewards/wordcountpos_reward/raw_geo/std": 0.08239590932674329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 935.0, "completions/max_terminated_length": 935.0, "completions/mean_length": 773.625, "completions/mean_terminated_length": 773.625, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.39147829565913184, "frac_reward_zero_std": 0.0, "grad_norm": 3.331683825317068, "kl": 0.0155181884765625, "learning_rate": 7.867765960434543e-07, "loss": -0.0004, "num_tokens": 85326395.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6733416318893433, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004082887098857441, "rewards/wordcountpos_reward/raw_geo/std": 0.16360180557639162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1173.75, "completions/mean_terminated_length": 1173.75, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.39167833566713345, "frac_reward_zero_std": 0.0, "grad_norm": 3.3701315077128413, "kl": 0.017791748046875, "learning_rate": 7.865093244302153e-07, "loss": 0.0313, "num_tokens": 85363535.0, "reward": 0.0, "reward_std": 0.8786790370941162, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04053263159152689, "rewards/wordcountpos_reward/raw_geo/std": 0.14550624995661623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1181.0, "completions/max_terminated_length": 1181.0, "completions/mean_length": 1004.1875, "completions/mean_terminated_length": 1004.1875, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.391878375675135, "frac_reward_zero_std": 0.0, "grad_norm": 3.2847657188945036, "kl": 0.0160064697265625, "learning_rate": 7.862419374939559e-07, "loss": -0.0249, "num_tokens": 85392338.0, "reward": 0.0, "reward_std": 0.8219481706619263, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.060644614748349764, "rewards/wordcountpos_reward/raw_geo/std": 0.07700475023673792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1369.0, "completions/mean_terminated_length": 1290.4000244140625, "completions/min_length": 1147.0, "completions/min_terminated_length": 1147.0, "epoch": 0.3920784156831366, "frac_reward_zero_std": 0.0, "grad_norm": 2.591051203650811, "kl": 0.0161285400390625, "learning_rate": 7.859744353650548e-07, "loss": -0.0008, "num_tokens": 85447922.0, "reward": 5.960464477539063e-08, "reward_std": 0.8923698663711548, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12203102061812558, "rewards/wordcountpos_reward/raw_geo/std": 0.04566537138325073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 999.375, "completions/mean_terminated_length": 999.375, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.39227845569113823, "frac_reward_zero_std": 0.0, "grad_norm": 3.27906364790894, "kl": 0.013641357421875, "learning_rate": 7.857068181739476e-07, "loss": -0.0111, "num_tokens": 85491416.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9789931774139404, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03943299986504064, "rewards/wordcountpos_reward/raw_geo/std": 0.09417395391686141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1031.0625, "completions/mean_terminated_length": 1031.0625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.39247849569913984, "frac_reward_zero_std": 0.0, "grad_norm": 3.126149456054329, "kl": 0.0142669677734375, "learning_rate": 7.854390860511255e-07, "loss": -0.0124, "num_tokens": 85530617.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0072274208068848, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08151374350387246, "rewards/wordcountpos_reward/raw_geo/std": 0.1187681007615068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1064.769287109375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.39267853570714145, "frac_reward_zero_std": 0.0, "grad_norm": 3.178697301529944, "kl": 0.0169219970703125, "learning_rate": 7.851712391271359e-07, "loss": 0.0214, "num_tokens": 85582503.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9279727339744568, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04226148927376202, "rewards/wordcountpos_reward/raw_geo/std": 0.05063945303439396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1148.1875, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.392878575715143, "frac_reward_zero_std": 0.0, "grad_norm": 3.165283861827048, "kl": 0.01641845703125, "learning_rate": 7.849032775325824e-07, "loss": -0.0184, "num_tokens": 85627618.0, "reward": -3.725290298461914e-09, "reward_std": 0.9582778215408325, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10531646638070305, "rewards/wordcountpos_reward/raw_geo/std": 0.05513195879744111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 972.9375, "completions/mean_terminated_length": 972.9375, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.3930786157231446, "frac_reward_zero_std": 0.0, "grad_norm": 3.6533349241015753, "kl": 0.0177764892578125, "learning_rate": 7.846352013981239e-07, "loss": -0.0008, "num_tokens": 85654105.0, "reward": -7.450580596923828e-09, "reward_std": 0.9160459637641907, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.041281719986851734, "rewards/wordcountpos_reward/raw_geo/std": 0.07520402981820258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1053.25, "completions/mean_terminated_length": 1053.25, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.39327865573114623, "frac_reward_zero_std": 0.0, "grad_norm": 3.0187150971238434, "kl": 0.0170135498046875, "learning_rate": 7.843670108544756e-07, "loss": -0.0331, "num_tokens": 85697989.0, "reward": 0.0, "reward_std": 0.8365430235862732, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13436313206221195, "rewards/wordcountpos_reward/raw_geo/std": 0.046601355610636476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1015.3125, "completions/mean_terminated_length": 1015.3125, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.39347869573914784, "frac_reward_zero_std": 0.0, "grad_norm": 3.529498743530727, "kl": 0.0175933837890625, "learning_rate": 7.840987060324089e-07, "loss": -0.0032, "num_tokens": 85729602.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9387333393096924, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02500254813345758, "rewards/wordcountpos_reward/raw_geo/std": 0.051729833395051696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1294.75, "completions/mean_terminated_length": 1281.0667724609375, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.39367873574714946, "frac_reward_zero_std": 0.0, "grad_norm": 2.8931413053652353, "kl": 0.015594482421875, "learning_rate": 7.8383028706275e-07, "loss": -0.0115, "num_tokens": 85782862.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8478749990463257, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.039036036904426565, "rewards/wordcountpos_reward/raw_geo/std": 0.12657714601589817, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1043.75, "completions/mean_terminated_length": 1043.75, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.393878775755151, "frac_reward_zero_std": 0.0, "grad_norm": 3.4661177599841713, "kl": 0.0149688720703125, "learning_rate": 7.835617540763813e-07, "loss": 0.0093, "num_tokens": 85826954.0, "reward": -1.4901161193847656e-08, "reward_std": 1.000534176826477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004058456368077229, "rewards/wordcountpos_reward/raw_geo/std": 0.047596678625114505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1172.625, "completions/mean_terminated_length": 976.2000122070312, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.3940788157631526, "frac_reward_zero_std": 0.0, "grad_norm": 3.0060367334951774, "kl": 0.0099029541015625, "learning_rate": 7.832931072042408e-07, "loss": -0.0551, "num_tokens": 85873068.0, "reward": 0.0, "reward_std": 0.5061690807342529, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1147.4375, "completions/mean_terminated_length": 987.1818237304688, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.39427885577115424, "frac_reward_zero_std": 0.0, "grad_norm": 2.6694401207468745, "kl": 0.013132095336914062, "learning_rate": 7.830243465773218e-07, "loss": 0.0079, "num_tokens": 85918627.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5675990581512451, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07759245990465403, "rewards/wordcountpos_reward/raw_geo/std": 0.09095469690428253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1131.625, "completions/mean_terminated_length": 1079.0, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.39447889577915585, "frac_reward_zero_std": 0.0, "grad_norm": 3.762735668511541, "kl": 0.018341064453125, "learning_rate": 7.827554723266733e-07, "loss": 0.0094, "num_tokens": 85960429.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0607939958572388, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14659792675934105, "rewards/wordcountpos_reward/raw_geo/std": 0.2826092775064619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1175.8125, "completions/mean_terminated_length": 1154.2000732421875, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.39467893578715746, "frac_reward_zero_std": 0.0, "grad_norm": 3.0543527230721215, "kl": 0.0151824951171875, "learning_rate": 7.824864845833995e-07, "loss": -0.0117, "num_tokens": 86004066.0, "reward": -7.450580596923828e-09, "reward_std": 0.9978142976760864, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.002552956104634882, "rewards/wordcountpos_reward/raw_geo/std": 0.16578180091000935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 959.1875, "completions/mean_terminated_length": 959.1875, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.394878975795159, "frac_reward_zero_std": 0.0, "grad_norm": 3.4221980911747063, "kl": 0.0134429931640625, "learning_rate": 7.822173834786602e-07, "loss": -0.0622, "num_tokens": 86042933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.722364604473114, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24464280770314523, "rewards/wordcountpos_reward/raw_geo/std": 0.2696868176330769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1141.125, "completions/mean_terminated_length": 1141.125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.3950790158031606, "frac_reward_zero_std": 0.0, "grad_norm": 2.825545300387118, "kl": 0.0166778564453125, "learning_rate": 7.819481691436702e-07, "loss": -0.0465, "num_tokens": 86090775.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0217972993850708, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037414088196032355, "rewards/wordcountpos_reward/raw_geo/std": 0.04124504159552026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1184.3125, "completions/mean_terminated_length": 1163.2667236328125, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.39527905581116224, "frac_reward_zero_std": 0.0, "grad_norm": 3.006456730337618, "kl": 0.0124969482421875, "learning_rate": 7.816788417096997e-07, "loss": -0.0109, "num_tokens": 86132172.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7910186052322388, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09900677930101995, "rewards/wordcountpos_reward/raw_geo/std": 0.07687970713092088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1006.0, "completions/mean_terminated_length": 935.4285888671875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.39547909581916385, "frac_reward_zero_std": 0.0, "grad_norm": 3.000391794135069, "kl": 0.01032257080078125, "learning_rate": 7.814094013080739e-07, "loss": -0.017, "num_tokens": 86176556.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7196230888366699, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07898987385200562, "rewards/wordcountpos_reward/raw_geo/std": 0.0698906690964471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1082.625, "completions/mean_terminated_length": 1054.800048828125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.3956791358271654, "frac_reward_zero_std": 0.0, "grad_norm": 3.321515029077363, "kl": 0.015411376953125, "learning_rate": 7.811398480701733e-07, "loss": 0.04, "num_tokens": 86215646.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0556150674819946, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417111795794797, "rewards/wordcountpos_reward/raw_geo/std": 0.031393966120172964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1138.9375, "completions/mean_terminated_length": 1114.86669921875, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.395879175835167, "frac_reward_zero_std": 0.0, "grad_norm": 3.274345722627407, "kl": 0.017059326171875, "learning_rate": 7.80870182127433e-07, "loss": -0.0402, "num_tokens": 86267293.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9667720794677734, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10490092387904479, "rewards/wordcountpos_reward/raw_geo/std": 0.0997550169597763, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 975.9375, "completions/mean_terminated_length": 975.9375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.39607921584316863, "frac_reward_zero_std": 0.0, "grad_norm": 2.9415402233272454, "kl": 0.014892578125, "learning_rate": 7.806004036113436e-07, "loss": -0.0419, "num_tokens": 86306220.0, "reward": 7.450580596923828e-09, "reward_std": 1.068274736404419, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11791751891020814, "rewards/wordcountpos_reward/raw_geo/std": 0.2901043462468264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 1030.5, "completions/mean_terminated_length": 1030.5, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.39627925585117024, "frac_reward_zero_std": 0.0, "grad_norm": 3.6772877641302166, "kl": 0.020477294921875, "learning_rate": 7.803305126534505e-07, "loss": -0.006, "num_tokens": 86338148.0, "reward": -2.9802322387695312e-08, "reward_std": 0.875225305557251, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1484228056040741, "rewards/wordcountpos_reward/raw_geo/std": 0.1545569673816577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1069.125, "completions/mean_terminated_length": 1069.125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.39647929585917185, "frac_reward_zero_std": 0.0, "grad_norm": 2.8270967489767704, "kl": 0.0222625732421875, "learning_rate": 7.800605093853533e-07, "loss": 0.032, "num_tokens": 86388526.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8502848744392395, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033677224899846564, "rewards/wordcountpos_reward/raw_geo/std": 0.18292180713889253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1077.6875, "completions/mean_terminated_length": 1049.533447265625, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3966793358671734, "frac_reward_zero_std": 0.0, "grad_norm": 2.621222912724427, "kl": 0.017578125, "learning_rate": 7.797903939387071e-07, "loss": -0.0161, "num_tokens": 86431577.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0643887519836426, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0584300029922047, "rewards/wordcountpos_reward/raw_geo/std": 0.12150300693234975, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.396879375875175, "frac_reward_zero_std": 0.0, "grad_norm": 3.316812044703328, "kl": 0.0160369873046875, "learning_rate": 7.795201664452215e-07, "loss": 0.0026, "num_tokens": 86474628.0, "reward": 0.0, "reward_std": 0.7597593069076538, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17380992919918895, "rewards/wordcountpos_reward/raw_geo/std": 0.13096476613901295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1391.75, "completions/mean_terminated_length": 1342.5455322265625, "completions/min_length": 1217.0, "completions/min_terminated_length": 1217.0, "epoch": 0.39707941588317663, "frac_reward_zero_std": 0.0, "grad_norm": 3.0245341290333974, "kl": 0.016387939453125, "learning_rate": 7.792498270366603e-07, "loss": 0.0032, "num_tokens": 86532848.0, "reward": 0.0, "reward_std": 0.938235878944397, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02542469493238682, "rewards/wordcountpos_reward/raw_geo/std": 0.07577014948746762, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1256.25, "completions/mean_terminated_length": 1240.0001220703125, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.39727945589117825, "frac_reward_zero_std": 0.0, "grad_norm": 2.874932738678797, "kl": 0.0143890380859375, "learning_rate": 7.789793758448425e-07, "loss": -0.0165, "num_tokens": 86578980.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8410855531692505, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03504135166237747, "rewards/wordcountpos_reward/raw_geo/std": 0.04493807432453479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1167.75, "completions/mean_terminated_length": 1120.2857666015625, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.39747949589917986, "frac_reward_zero_std": 0.0, "grad_norm": 3.4508761642315715, "kl": 0.01910400390625, "learning_rate": 7.787088130016413e-07, "loss": -0.0205, "num_tokens": 86630328.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8712863922119141, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2989933843555322, "rewards/wordcountpos_reward/raw_geo/std": 0.061886168635224344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1167.25, "completions/mean_terminated_length": 1167.25, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.3976795359071814, "frac_reward_zero_std": 0.0, "grad_norm": 3.386146320953828, "kl": 0.0154876708984375, "learning_rate": 7.784381386389842e-07, "loss": 0.0064, "num_tokens": 86672260.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6728423833847046, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08227600392823269, "rewards/wordcountpos_reward/raw_geo/std": 0.05541545232427812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1161.3125, "completions/mean_terminated_length": 1138.7333984375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.397879575915183, "frac_reward_zero_std": 0.0, "grad_norm": 2.546803036867779, "kl": 0.01361083984375, "learning_rate": 7.781673528888536e-07, "loss": -0.0202, "num_tokens": 86717457.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0329099893569946, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0887698562143239, "rewards/wordcountpos_reward/raw_geo/std": 0.20679781615560397, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1214.375, "completions/mean_terminated_length": 1148.4615478515625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.39807961592318464, "frac_reward_zero_std": 0.0, "grad_norm": 3.2231077577448985, "kl": 0.0168304443359375, "learning_rate": 7.778964558832855e-07, "loss": -0.0624, "num_tokens": 86771127.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7980071306228638, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033462326091415195, "rewards/wordcountpos_reward/raw_geo/std": 0.3019643714403689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 966.0625, "completions/mean_terminated_length": 966.0625, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.39827965593118625, "frac_reward_zero_std": 0.0, "grad_norm": 2.9597762319378003, "kl": 0.015228271484375, "learning_rate": 7.776254477543706e-07, "loss": -0.0345, "num_tokens": 86812520.0, "reward": 0.0, "reward_std": 0.8864554166793823, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15404831626704482, "rewards/wordcountpos_reward/raw_geo/std": 0.2305913883800116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1146.5625, "completions/mean_terminated_length": 1123.0, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.39847969593918786, "frac_reward_zero_std": 0.0, "grad_norm": 3.539974949529452, "kl": 0.01519775390625, "learning_rate": 7.77354328634254e-07, "loss": 0.0054, "num_tokens": 86845473.0, "reward": 0.0, "reward_std": 0.9918215274810791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1194.4375, "completions/mean_terminated_length": 1150.7857666015625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.3986797359471894, "frac_reward_zero_std": 0.0, "grad_norm": 3.158571610173031, "kl": 0.014923095703125, "learning_rate": 7.770830986551341e-07, "loss": 0.0446, "num_tokens": 86888088.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9001962542533875, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1166857819423355, "rewards/wordcountpos_reward/raw_geo/std": 0.07054107705439644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1174.8125, "completions/mean_terminated_length": 1066.416748046875, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.39887977595519103, "frac_reward_zero_std": 0.0, "grad_norm": 3.142714087899282, "kl": 0.0154571533203125, "learning_rate": 7.768117579492643e-07, "loss": -0.0181, "num_tokens": 86943621.0, "reward": 3.725290298461914e-09, "reward_std": 1.0066262483596802, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.01813017647922831, "rewards/wordcountpos_reward/raw_geo/std": 0.11261853532734675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1160.625, "completions/mean_terminated_length": 1160.625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.39907981596319264, "frac_reward_zero_std": 0.0, "grad_norm": 3.673707740666206, "kl": 0.0228271484375, "learning_rate": 7.765403066489513e-07, "loss": 0.0398, "num_tokens": 86991311.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9432878494262695, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08486769472371088, "rewards/wordcountpos_reward/raw_geo/std": 0.14940668347893782, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1175.75, "completions/mean_terminated_length": 1154.1334228515625, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.39927985597119425, "frac_reward_zero_std": 0.0, "grad_norm": 2.5720892885089075, "kl": 0.01214599609375, "learning_rate": 7.762687448865561e-07, "loss": -0.0178, "num_tokens": 87030875.0, "reward": -2.2351741790771484e-08, "reward_std": 1.053713083267212, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.045502518534684094, "rewards/wordcountpos_reward/raw_geo/std": 0.058434464374287634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1179.125, "completions/mean_terminated_length": 1179.125, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.39947989597919586, "frac_reward_zero_std": 0.0, "grad_norm": 3.076459365274049, "kl": 0.0154571533203125, "learning_rate": 7.759970727944935e-07, "loss": -0.006, "num_tokens": 87077357.0, "reward": 0.0, "reward_std": 0.9475910663604736, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0480510984401983, "rewards/wordcountpos_reward/raw_geo/std": 0.09420258603292415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1071.875, "completions/mean_terminated_length": 1071.875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.3996799359871974, "frac_reward_zero_std": 0.0, "grad_norm": 3.272286767549367, "kl": 0.018524169921875, "learning_rate": 7.757252905052318e-07, "loss": -0.0004, "num_tokens": 87118083.0, "reward": 0.0, "reward_std": 0.4661349654197693, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04574095976598698, "rewards/wordcountpos_reward/raw_geo/std": 0.13979497245116046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1169.125, "completions/mean_terminated_length": 1169.125, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.39987997599519903, "frac_reward_zero_std": 0.0, "grad_norm": 1.9610670701878443, "kl": 0.00823974609375, "learning_rate": 7.754533981512936e-07, "loss": -0.0044, "num_tokens": 87163789.0, "reward": 0.0, "reward_std": 0.8742091655731201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07824254678663017, "rewards/wordcountpos_reward/raw_geo/std": 0.08426140337045179, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1190.375, "completions/mean_terminated_length": 1190.375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.40008001600320064, "frac_reward_zero_std": 0.0, "grad_norm": 2.771438603318052, "kl": 0.0152435302734375, "learning_rate": 7.751813958652548e-07, "loss": -0.0311, "num_tokens": 87205835.0, "reward": 7.450580596923828e-09, "reward_std": 1.0564095973968506, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.01487273727736594, "rewards/wordcountpos_reward/raw_geo/std": 0.11694311570860827, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2000 } ], "logging_steps": 1, "max_steps": 4999, "num_input_tokens_seen": 87205835, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }