10275 lines
454 KiB
JSON
10275 lines
454 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.4007514088916719,
|
|
"eval_steps": 500,
|
|
"global_step": 320,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1427.0,
|
|
"completions/mean_length": 1310.375,
|
|
"completions/mean_terminated_length": 1120.75,
|
|
"completions/min_length": 941.0,
|
|
"completions/min_terminated_length": 941.0,
|
|
"epoch": 0.0012523481527864746,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4675665797659936,
|
|
"kl": 0.0014476776123046875,
|
|
"learning_rate": 0.0,
|
|
"loss": -0.0042,
|
|
"num_tokens": 47606.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 1.0425715446472168,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020242706942291286,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08320206610241015,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1128748897706693,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 1
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1493.0,
|
|
"completions/mean_length": 1215.625,
|
|
"completions/mean_terminated_length": 1120.8333740234375,
|
|
"completions/min_length": 920.0,
|
|
"completions/min_terminated_length": 920.0,
|
|
"epoch": 0.002504696305572949,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5220841352987073,
|
|
"kl": 0.002323150634765625,
|
|
"learning_rate": 1.25e-08,
|
|
"loss": -0.0365,
|
|
"num_tokens": 78984.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9615500569343567,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019240361081273367,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0375240418925418,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 2
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1430.8125,
|
|
"completions/mean_terminated_length": 1341.857177734375,
|
|
"completions/min_length": 1171.0,
|
|
"completions/min_terminated_length": 1171.0,
|
|
"epoch": 0.003757044458359424,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7257956401904653,
|
|
"kl": 0.0018787384033203125,
|
|
"learning_rate": 2.5e-08,
|
|
"loss": -0.014,
|
|
"num_tokens": 126437.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0492231845855713,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09708628067006185,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16724793667635054,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476838,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 3
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 1463.4375,
|
|
"completions/mean_terminated_length": 1353.75,
|
|
"completions/min_length": 1084.0,
|
|
"completions/min_terminated_length": 1084.0,
|
|
"epoch": 0.005009392611145898,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0068456094040337,
|
|
"kl": 0.00238037109375,
|
|
"learning_rate": 3.75e-08,
|
|
"loss": -0.0103,
|
|
"num_tokens": 192900.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.4076952338218689,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.42554686388976987,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3748667411110748,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 4
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.006261740763932373,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0250923226839315,
|
|
"kl": 0.002262115478515625,
|
|
"learning_rate": 5e-08,
|
|
"loss": 0.0001,
|
|
"num_tokens": 257452.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9494391083717346,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0021633155301854353,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04003332867073718,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043475,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 5
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1443.0,
|
|
"completions/mean_length": 1213.4375,
|
|
"completions/mean_terminated_length": 1041.5,
|
|
"completions/min_length": 749.0,
|
|
"completions/min_terminated_length": 749.0,
|
|
"epoch": 0.007514088916718848,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.520673181444066,
|
|
"kl": 0.002166748046875,
|
|
"learning_rate": 6.25e-08,
|
|
"loss": -0.0047,
|
|
"num_tokens": 300227.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.000030517578125,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1494053837623106,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21650138601325905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 6
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1475.0,
|
|
"completions/mean_length": 1441.5,
|
|
"completions/mean_terminated_length": 1266.0,
|
|
"completions/min_length": 868.0,
|
|
"completions/min_terminated_length": 868.0,
|
|
"epoch": 0.008766437069505322,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.131914910533151,
|
|
"kl": 0.002285003662109375,
|
|
"learning_rate": 7.5e-08,
|
|
"loss": -0.0115,
|
|
"num_tokens": 365811.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.021754264831543,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20434821411964987,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13055976557133547,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 7
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1037.0,
|
|
"completions/mean_length": 1197.6875,
|
|
"completions/mean_terminated_length": 895.375,
|
|
"completions/min_length": 718.0,
|
|
"completions/min_terminated_length": 718.0,
|
|
"epoch": 0.010018785222291797,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1709545933290264,
|
|
"kl": 0.001911163330078125,
|
|
"learning_rate": 8.75e-08,
|
|
"loss": 0.0124,
|
|
"num_tokens": 406590.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7096362113952637,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011512278889933215,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017023573988747046,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901863,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 8
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1368.0,
|
|
"completions/mean_length": 1424.8125,
|
|
"completions/mean_terminated_length": 1299.5,
|
|
"completions/min_length": 1222.0,
|
|
"completions/min_terminated_length": 1222.0,
|
|
"epoch": 0.011271133375078271,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4985696146916663,
|
|
"kl": 0.0014972686767578125,
|
|
"learning_rate": 1e-07,
|
|
"loss": 0.0017,
|
|
"num_tokens": 449955.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.592147946357727,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09093222668860702,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16366647482965233,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 9
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1452.0,
|
|
"completions/mean_length": 1497.0,
|
|
"completions/mean_terminated_length": 1452.0,
|
|
"completions/min_length": 1452.0,
|
|
"completions/min_terminated_length": 1452.0,
|
|
"epoch": 0.012523481527864746,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.994231395858393,
|
|
"kl": 0.002552032470703125,
|
|
"learning_rate": 1.125e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 512611.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7100945115089417,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.39335439512941156,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.44383620756924225,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 10
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1319.0,
|
|
"completions/mean_terminated_length": 1258.666748046875,
|
|
"completions/min_length": 1147.0,
|
|
"completions/min_terminated_length": 1147.0,
|
|
"epoch": 0.013775829680651221,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.29188099989823,
|
|
"kl": 0.002773284912109375,
|
|
"learning_rate": 1.25e-07,
|
|
"loss": -0.0193,
|
|
"num_tokens": 578363.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7537417411804199,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.006668674614171876,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06272286484055771,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.49583333333333335,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693655,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 11
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1362.0,
|
|
"completions/max_terminated_length": 1362.0,
|
|
"completions/mean_length": 856.4375,
|
|
"completions/mean_terminated_length": 856.4375,
|
|
"completions/min_length": 689.0,
|
|
"completions/min_terminated_length": 689.0,
|
|
"epoch": 0.015028177833437696,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.02451725178761,
|
|
"kl": 0.0014524459838867188,
|
|
"learning_rate": 1.375e-07,
|
|
"loss": -0.0034,
|
|
"num_tokens": 624626.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.25382307171821594,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16171540881469407,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04512392405527899,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.17293758240303758,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 12
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1386.0,
|
|
"completions/mean_length": 1088.5625,
|
|
"completions/mean_terminated_length": 901.5454711914062,
|
|
"completions/min_length": 674.0,
|
|
"completions/min_terminated_length": 674.0,
|
|
"epoch": 0.01628052598622417,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 4.015402694119637,
|
|
"kl": 0.0021724700927734375,
|
|
"learning_rate": 1.5e-07,
|
|
"loss": -0.0824,
|
|
"num_tokens": 681059.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9276120662689209,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.041562779715464626,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1909826248378845,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11409872268574492,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 13
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1445.0,
|
|
"completions/mean_length": 1496.5625,
|
|
"completions/mean_terminated_length": 1445.0,
|
|
"completions/min_length": 1445.0,
|
|
"completions/min_terminated_length": 1445.0,
|
|
"epoch": 0.017532874139010644,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.995346934747371,
|
|
"kl": 0.002460479736328125,
|
|
"learning_rate": 1.625e-07,
|
|
"loss": -0.001,
|
|
"num_tokens": 745196.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.008323073387146,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.057098024958501865,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10812840498160957,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1398411797560202,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 14
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1481.0,
|
|
"completions/mean_length": 1267.8125,
|
|
"completions/mean_terminated_length": 1128.5,
|
|
"completions/min_length": 842.0,
|
|
"completions/min_terminated_length": 842.0,
|
|
"epoch": 0.01878522229179712,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.681468707345247,
|
|
"kl": 0.002552032470703125,
|
|
"learning_rate": 1.75e-07,
|
|
"loss": -0.0258,
|
|
"num_tokens": 802777.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.5409140586853027,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.025752634294563932,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1190717918627845,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10318986456114838,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 15
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1424.0,
|
|
"completions/mean_length": 1465.75,
|
|
"completions/mean_terminated_length": 1317.3333740234375,
|
|
"completions/min_length": 1240.0,
|
|
"completions/min_terminated_length": 1240.0,
|
|
"epoch": 0.020037570444583593,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.952391425850165,
|
|
"kl": 0.00229644775390625,
|
|
"learning_rate": 1.875e-07,
|
|
"loss": 0.0085,
|
|
"num_tokens": 852397.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9532216191291809,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.009914003172755002,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14279656209744931,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1085254706406647,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 16
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1415.0,
|
|
"completions/mean_length": 1464.1875,
|
|
"completions/mean_terminated_length": 1356.75,
|
|
"completions/min_length": 1308.0,
|
|
"completions/min_terminated_length": 1308.0,
|
|
"epoch": 0.021289918597370068,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6835952376597447,
|
|
"kl": 0.0016937255859375,
|
|
"learning_rate": 2e-07,
|
|
"loss": -0.0182,
|
|
"num_tokens": 905544.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.670647144317627,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0385939635652747,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11140246797780545,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14950535726806533,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 17
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1456.0,
|
|
"completions/mean_length": 1226.6875,
|
|
"completions/mean_terminated_length": 1163.615478515625,
|
|
"completions/min_length": 833.0,
|
|
"completions/min_terminated_length": 833.0,
|
|
"epoch": 0.022542266750156543,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 4.118592346302386,
|
|
"kl": 0.0031585693359375,
|
|
"learning_rate": 2.1249999999999998e-07,
|
|
"loss": -0.014,
|
|
"num_tokens": 958635.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0170769691467285,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004864839675281578,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0373192839130601,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1192569587999888,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 18
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1374.0,
|
|
"completions/mean_length": 1492.125,
|
|
"completions/mean_terminated_length": 1374.0,
|
|
"completions/min_length": 1374.0,
|
|
"completions/min_terminated_length": 1374.0,
|
|
"epoch": 0.023794614902943018,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9483970425927475,
|
|
"kl": 0.0020732879638671875,
|
|
"learning_rate": 2.25e-07,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1017653.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8760651350021362,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014058396075366015,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03110460490345673,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 19
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 1290.125,
|
|
"completions/mean_terminated_length": 1164.2000732421875,
|
|
"completions/min_length": 987.0,
|
|
"completions/min_terminated_length": 987.0,
|
|
"epoch": 0.025046963055729492,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3136168175643763,
|
|
"kl": 0.002140045166015625,
|
|
"learning_rate": 2.3749999999999998e-07,
|
|
"loss": -0.0256,
|
|
"num_tokens": 1065663.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9537639617919922,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038097750035485885,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1082295867822669,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0850925422157591,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 20
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1263.0,
|
|
"completions/mean_length": 1341.5,
|
|
"completions/mean_terminated_length": 1183.0,
|
|
"completions/min_length": 1034.0,
|
|
"completions/min_terminated_length": 1034.0,
|
|
"epoch": 0.026299311208515967,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.306897182610288,
|
|
"kl": 0.0024871826171875,
|
|
"learning_rate": 2.5e-07,
|
|
"loss": -0.0094,
|
|
"num_tokens": 1117263.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.990053117275238,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011397748892334698,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046758634855771405,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725538,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 21
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1342.0,
|
|
"completions/mean_length": 1266.625,
|
|
"completions/mean_terminated_length": 1033.25,
|
|
"completions/min_length": 774.0,
|
|
"completions/min_terminated_length": 774.0,
|
|
"epoch": 0.027551659361302442,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3250640108747564,
|
|
"kl": 0.002407073974609375,
|
|
"learning_rate": 2.625e-07,
|
|
"loss": -0.0385,
|
|
"num_tokens": 1172489.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7966146469116211,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020326344256082304,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14616918176802837,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 22
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1236.0,
|
|
"completions/mean_length": 1460.875,
|
|
"completions/mean_terminated_length": 1187.0,
|
|
"completions/min_length": 1138.0,
|
|
"completions/min_terminated_length": 1138.0,
|
|
"epoch": 0.028804007514088917,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4267377669243926,
|
|
"kl": 0.0071010589599609375,
|
|
"learning_rate": 2.75e-07,
|
|
"loss": -0.0111,
|
|
"num_tokens": 1234527.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.5723245143890381,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02122072131733574,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.157410051166117,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 23
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1325.625,
|
|
"completions/mean_terminated_length": 1151.25,
|
|
"completions/min_length": 1018.0,
|
|
"completions/min_terminated_length": 1018.0,
|
|
"epoch": 0.03005635566687539,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.24473326800092,
|
|
"kl": 0.0023651123046875,
|
|
"learning_rate": 2.8749999999999995e-07,
|
|
"loss": -0.0069,
|
|
"num_tokens": 1269905.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.0385103225708008,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04647400767345873,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09647557054247557,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15371932093796678,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 24
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1429.0,
|
|
"completions/mean_length": 1377.0625,
|
|
"completions/mean_terminated_length": 1321.181884765625,
|
|
"completions/min_length": 1206.0,
|
|
"completions/min_terminated_length": 1206.0,
|
|
"epoch": 0.031308703819661866,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6845626042385518,
|
|
"kl": 0.001850128173828125,
|
|
"learning_rate": 3e-07,
|
|
"loss": 0.0174,
|
|
"num_tokens": 1328778.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7787291407585144,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09266568639996468,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0822707712414604,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12382783747337808,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 25
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1352.0,
|
|
"completions/mean_length": 1169.5625,
|
|
"completions/mean_terminated_length": 1093.3077392578125,
|
|
"completions/min_length": 721.0,
|
|
"completions/min_terminated_length": 721.0,
|
|
"epoch": 0.03256105197244834,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.712859953207578,
|
|
"kl": 0.00261688232421875,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0167,
|
|
"num_tokens": 1363011.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0016117095947266,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1709176314049482,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1600211117254044,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 26
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1413.0,
|
|
"completions/mean_length": 1215.9375,
|
|
"completions/mean_terminated_length": 1197.0001220703125,
|
|
"completions/min_length": 950.0,
|
|
"completions/min_terminated_length": 950.0,
|
|
"epoch": 0.033813400125234816,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.399525494329125,
|
|
"kl": 0.002574920654296875,
|
|
"learning_rate": 3.25e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 1407434.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9290227890014648,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03979791227452069,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13950243126020834,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593314,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 27
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1368.0,
|
|
"completions/mean_length": 1340.4375,
|
|
"completions/mean_terminated_length": 1180.875,
|
|
"completions/min_length": 1034.0,
|
|
"completions/min_terminated_length": 1034.0,
|
|
"epoch": 0.03506574827802129,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9566553373980344,
|
|
"kl": 0.0021228790283203125,
|
|
"learning_rate": 3.375e-07,
|
|
"loss": -0.0056,
|
|
"num_tokens": 1458241.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7809990644454956,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028119487654073606,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1198837710832071,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6083333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0873477511423713,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 28
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1401.625,
|
|
"completions/mean_terminated_length": 1275.1429443359375,
|
|
"completions/min_length": 1054.0,
|
|
"completions/min_terminated_length": 1054.0,
|
|
"epoch": 0.036318096430807766,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9381632846830548,
|
|
"kl": 0.002338409423828125,
|
|
"learning_rate": 3.5e-07,
|
|
"loss": 0.0125,
|
|
"num_tokens": 1523987.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9863969087600708,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06145046632658874,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08502220502724643,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829063,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 29
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1394.0,
|
|
"completions/mean_length": 1257.625,
|
|
"completions/mean_terminated_length": 1069.111083984375,
|
|
"completions/min_length": 922.0,
|
|
"completions/min_terminated_length": 922.0,
|
|
"epoch": 0.03757044458359424,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4028418647289094,
|
|
"kl": 0.0042324066162109375,
|
|
"learning_rate": 3.6249999999999997e-07,
|
|
"loss": -0.0044,
|
|
"num_tokens": 1582341.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9369316697120667,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.058010557784549034,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06029435215775259,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 30
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1184.0,
|
|
"completions/mean_length": 1480.25,
|
|
"completions/mean_terminated_length": 1184.0,
|
|
"completions/min_length": 1184.0,
|
|
"completions/min_terminated_length": 1184.0,
|
|
"epoch": 0.038822792736380715,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.842249981197029,
|
|
"kl": 0.0021514892578125,
|
|
"learning_rate": 3.75e-07,
|
|
"loss": -0.0106,
|
|
"num_tokens": 1629017.0,
|
|
"reward": -2.2351741790771484e-08,
|
|
"reward_std": 1.0243444442749023,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010824625533504566,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02884739427994731,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 31
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1032.0,
|
|
"completions/mean_terminated_length": 668.0,
|
|
"completions/min_length": 294.0,
|
|
"completions/min_terminated_length": 294.0,
|
|
"epoch": 0.040075140889167186,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.8067119735652115,
|
|
"kl": 0.0024871826171875,
|
|
"learning_rate": 3.875e-07,
|
|
"loss": 0.0413,
|
|
"num_tokens": 1666305.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8114193677902222,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05789475536948171,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04045242685812858,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14707015206910487,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 32
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 1271.25,
|
|
"completions/mean_terminated_length": 1195.0,
|
|
"completions/min_length": 1030.0,
|
|
"completions/min_terminated_length": 1030.0,
|
|
"epoch": 0.041327489041953665,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1322197700429,
|
|
"kl": 0.00167083740234375,
|
|
"learning_rate": 4e-07,
|
|
"loss": -0.0393,
|
|
"num_tokens": 1727629.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.4495465159416199,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05764457052515048,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11640629412276694,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0906764700582363,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 33
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1438.0625,
|
|
"completions/mean_terminated_length": 1334.8333740234375,
|
|
"completions/min_length": 1171.0,
|
|
"completions/min_terminated_length": 1171.0,
|
|
"epoch": 0.042579837194740136,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.291671899271785,
|
|
"kl": 0.0019550323486328125,
|
|
"learning_rate": 4.1249999999999997e-07,
|
|
"loss": 0.0187,
|
|
"num_tokens": 1794062.0,
|
|
"reward": 2.60770320892334e-08,
|
|
"reward_std": 1.0634629726409912,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05489684988302594,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2423673289052158,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11538983843829065,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 34
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1106.0,
|
|
"completions/mean_length": 1253.1875,
|
|
"completions/mean_terminated_length": 1061.2222900390625,
|
|
"completions/min_length": 977.0,
|
|
"completions/min_terminated_length": 977.0,
|
|
"epoch": 0.043832185347526614,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3253132789681,
|
|
"kl": 0.00135040283203125,
|
|
"learning_rate": 4.2499999999999995e-07,
|
|
"loss": 0.0016,
|
|
"num_tokens": 1847393.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.6563782691955566,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072262342914357,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.195641332904443,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746356,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 35
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1399.3125,
|
|
"completions/mean_terminated_length": 1298.625,
|
|
"completions/min_length": 1031.0,
|
|
"completions/min_terminated_length": 1031.0,
|
|
"epoch": 0.045084533500313086,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7548692610807795,
|
|
"kl": 0.0021877288818359375,
|
|
"learning_rate": 4.375e-07,
|
|
"loss": -0.0061,
|
|
"num_tokens": 1892206.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9828654527664185,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06116004436340469,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10276980263780594,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 36
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1352.6875,
|
|
"completions/mean_terminated_length": 1318.6923828125,
|
|
"completions/min_length": 1218.0,
|
|
"completions/min_terminated_length": 1218.0,
|
|
"epoch": 0.046336881653099564,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.47319096015074,
|
|
"kl": 0.0014925003051757812,
|
|
"learning_rate": 4.5e-07,
|
|
"loss": -0.0109,
|
|
"num_tokens": 1940945.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.955802857875824,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.041245323817924374,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19292307241869963,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 37
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 1375.25,
|
|
"completions/mean_terminated_length": 1250.5,
|
|
"completions/min_length": 959.0,
|
|
"completions/min_terminated_length": 959.0,
|
|
"epoch": 0.047589229805886035,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1878681605362496,
|
|
"kl": 0.002475738525390625,
|
|
"learning_rate": 4.625e-07,
|
|
"loss": -0.0118,
|
|
"num_tokens": 1985181.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.054539442062378,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.019033803394582376,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10927050985901436,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06831300510639736,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 38
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1421.0,
|
|
"completions/mean_length": 1447.0,
|
|
"completions/mean_terminated_length": 1288.0,
|
|
"completions/min_length": 1065.0,
|
|
"completions/min_terminated_length": 1065.0,
|
|
"epoch": 0.048841577958672514,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.188392715000756,
|
|
"kl": 0.00237274169921875,
|
|
"learning_rate": 4.7499999999999995e-07,
|
|
"loss": 0.0406,
|
|
"num_tokens": 2034525.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.0613259077072144,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.019229174460983274,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03385821534786073,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09651328828101764,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 39
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1135.0,
|
|
"completions/mean_length": 1148.75,
|
|
"completions/mean_terminated_length": 797.5,
|
|
"completions/min_length": 735.0,
|
|
"completions/min_terminated_length": 735.0,
|
|
"epoch": 0.050093926111458985,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6930283084099047,
|
|
"kl": 0.0015668869018554688,
|
|
"learning_rate": 4.875e-07,
|
|
"loss": -0.0288,
|
|
"num_tokens": 2080937.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7898622751235962,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05072289975795826,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19078379794323846,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 40
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1433.0,
|
|
"completions/mean_length": 1416.9375,
|
|
"completions/mean_terminated_length": 1333.875,
|
|
"completions/min_length": 1244.0,
|
|
"completions/min_terminated_length": 1244.0,
|
|
"epoch": 0.05134627426424546,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.177282619828498,
|
|
"kl": 0.0012989044189453125,
|
|
"learning_rate": 5e-07,
|
|
"loss": 0.0091,
|
|
"num_tokens": 2136744.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0509533882141113,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.031295483862865174,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11149225377383207,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686706,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 41
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1442.0,
|
|
"completions/mean_length": 1273.4375,
|
|
"completions/mean_terminated_length": 1197.916748046875,
|
|
"completions/min_length": 943.0,
|
|
"completions/min_terminated_length": 943.0,
|
|
"epoch": 0.052598622417031934,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2241520761115305,
|
|
"kl": 0.002368927001953125,
|
|
"learning_rate": 5.125e-07,
|
|
"loss": 0.004,
|
|
"num_tokens": 2171271.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0308477878570557,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12167064883765863,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12965137595029042,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5416666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15177956725803718,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 42
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 1466.6875,
|
|
"completions/mean_terminated_length": 1366.75,
|
|
"completions/min_length": 1132.0,
|
|
"completions/min_terminated_length": 1132.0,
|
|
"epoch": 0.05385097056981841,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9471462447348635,
|
|
"kl": 0.0021038055419921875,
|
|
"learning_rate": 5.25e-07,
|
|
"loss": -0.0164,
|
|
"num_tokens": 2231986.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.23251324892044067,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04063102604876061,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.22066858001488113,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.205074512203627,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 43
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1422.3125,
|
|
"completions/mean_terminated_length": 1322.4285888671875,
|
|
"completions/min_length": 1074.0,
|
|
"completions/min_terminated_length": 1074.0,
|
|
"epoch": 0.055103318722604884,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4268519840677527,
|
|
"kl": 0.0010042190551757812,
|
|
"learning_rate": 5.374999999999999e-07,
|
|
"loss": -0.0099,
|
|
"num_tokens": 2288223.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9692014455795288,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.070492449256456,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20832413138507544,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06440611887195309,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 44
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1448.0,
|
|
"completions/mean_length": 1376.0,
|
|
"completions/mean_terminated_length": 1301.5999755859375,
|
|
"completions/min_length": 954.0,
|
|
"completions/min_terminated_length": 954.0,
|
|
"epoch": 0.056355666875391355,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.03273915452992,
|
|
"kl": 0.002422332763671875,
|
|
"learning_rate": 5.5e-07,
|
|
"loss": 0.0123,
|
|
"num_tokens": 2354343.0,
|
|
"reward": 1.862645149230957e-08,
|
|
"reward_std": 1.067973256111145,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1362560230689488,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16718884747044185,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.058214163988576643,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 45
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1460.0,
|
|
"completions/mean_length": 1453.0625,
|
|
"completions/mean_terminated_length": 1392.71435546875,
|
|
"completions/min_length": 1326.0,
|
|
"completions/min_terminated_length": 1326.0,
|
|
"epoch": 0.057608015028177834,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7636896151627517,
|
|
"kl": 0.001781463623046875,
|
|
"learning_rate": 5.625e-07,
|
|
"loss": -0.0161,
|
|
"num_tokens": 2410776.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.722027599811554,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.175370955230916,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15404241260320187,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14446581038560777,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 46
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1185.0,
|
|
"completions/mean_length": 1384.8125,
|
|
"completions/mean_terminated_length": 1039.25,
|
|
"completions/min_length": 797.0,
|
|
"completions/min_terminated_length": 797.0,
|
|
"epoch": 0.058860363180964305,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.218383433006556,
|
|
"kl": 0.002429962158203125,
|
|
"learning_rate": 5.749999999999999e-07,
|
|
"loss": -0.0616,
|
|
"num_tokens": 2460181.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8904982209205627,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.020496850203242982,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13226975013047063,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08062257748298553,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 47
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1438.0,
|
|
"completions/mean_length": 1472.0,
|
|
"completions/mean_terminated_length": 1350.666748046875,
|
|
"completions/min_length": 1240.0,
|
|
"completions/min_terminated_length": 1240.0,
|
|
"epoch": 0.06011271133375078,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5256786742208663,
|
|
"kl": 0.002872467041015625,
|
|
"learning_rate": 5.875e-07,
|
|
"loss": -0.0096,
|
|
"num_tokens": 2524269.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.001219630241394,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0854065639247727,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950921206250912,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10327955589886446,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 48
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1449.0,
|
|
"completions/mean_length": 1430.9375,
|
|
"completions/mean_terminated_length": 1315.8333740234375,
|
|
"completions/min_length": 1131.0,
|
|
"completions/min_terminated_length": 1131.0,
|
|
"epoch": 0.061365059486537255,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.961325619079936,
|
|
"kl": 0.002254486083984375,
|
|
"learning_rate": 6e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 2584356.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9873535633087158,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029950137491573797,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16218750528728998,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505425,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 49
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1466.0,
|
|
"completions/mean_length": 1491.125,
|
|
"completions/mean_terminated_length": 1429.0,
|
|
"completions/min_length": 1392.0,
|
|
"completions/min_terminated_length": 1392.0,
|
|
"epoch": 0.06261740763932373,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6822700139828397,
|
|
"kl": 0.0020389556884765625,
|
|
"learning_rate": 6.125000000000001e-07,
|
|
"loss": -0.0009,
|
|
"num_tokens": 2648270.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.7698144912719727,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032020807081585716,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.053695035371207615,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369005,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 50
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1457.0,
|
|
"completions/mean_length": 1282.125,
|
|
"completions/mean_terminated_length": 1112.6666259765625,
|
|
"completions/min_length": 802.0,
|
|
"completions/min_terminated_length": 802.0,
|
|
"epoch": 0.06386975579211021,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.07804473575342,
|
|
"kl": 0.002071380615234375,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": -0.0074,
|
|
"num_tokens": 2693776.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5634655952453613,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.013292184885055576,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12085541345993306,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 51
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1462.75,
|
|
"completions/mean_terminated_length": 1301.3333740234375,
|
|
"completions/min_length": 998.0,
|
|
"completions/min_terminated_length": 998.0,
|
|
"epoch": 0.06512210394489668,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.194495117066712,
|
|
"kl": 0.002658843994140625,
|
|
"learning_rate": 6.374999999999999e-07,
|
|
"loss": 0.0226,
|
|
"num_tokens": 2758980.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.985281229019165,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02226574155778713,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05167870819779757,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746353,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 52
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1426.0,
|
|
"completions/mean_length": 1321.3125,
|
|
"completions/mean_terminated_length": 1295.7857666015625,
|
|
"completions/min_length": 1123.0,
|
|
"completions/min_terminated_length": 1123.0,
|
|
"epoch": 0.06637445209768315,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.085117182590426,
|
|
"kl": 0.0022735595703125,
|
|
"learning_rate": 6.5e-07,
|
|
"loss": 0.0196,
|
|
"num_tokens": 2825249.0,
|
|
"reward": 4.470348358154297e-08,
|
|
"reward_std": 0.9839984774589539,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08735912330077701,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14559155866011037,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06978803887752093,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 53
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 1498.5625,
|
|
"completions/mean_terminated_length": 1477.0,
|
|
"completions/min_length": 1477.0,
|
|
"completions/min_terminated_length": 1477.0,
|
|
"epoch": 0.06762680025046963,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9348811206443304,
|
|
"kl": 0.00191497802734375,
|
|
"learning_rate": 6.624999999999999e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 2889498.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0318164825439453,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04941181253574712,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06836218150195612,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045227,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 54
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1472.0,
|
|
"completions/mean_length": 1384.875,
|
|
"completions/mean_terminated_length": 1236.857177734375,
|
|
"completions/min_length": 913.0,
|
|
"completions/min_terminated_length": 913.0,
|
|
"epoch": 0.06887914840325611,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0200180369762695,
|
|
"kl": 0.0021152496337890625,
|
|
"learning_rate": 6.75e-07,
|
|
"loss": -0.0343,
|
|
"num_tokens": 2950200.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0550494194030762,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038887574815180403,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06912072840442107,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258099,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 55
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1450.0,
|
|
"completions/mean_length": 1435.0625,
|
|
"completions/mean_terminated_length": 1370.125,
|
|
"completions/min_length": 1161.0,
|
|
"completions/min_terminated_length": 1161.0,
|
|
"epoch": 0.07013149655604257,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9170742885205607,
|
|
"kl": 0.0019893646240234375,
|
|
"learning_rate": 6.875e-07,
|
|
"loss": 0.0029,
|
|
"num_tokens": 3019673.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9821785688400269,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016992912073662925,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.105336871629235,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 56
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1499.0,
|
|
"completions/mean_length": 1360.75,
|
|
"completions/mean_terminated_length": 1221.5,
|
|
"completions/min_length": 1081.0,
|
|
"completions/min_terminated_length": 1081.0,
|
|
"epoch": 0.07138384470882905,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3821452004991794,
|
|
"kl": 0.0014314651489257812,
|
|
"learning_rate": 7e-07,
|
|
"loss": 0.029,
|
|
"num_tokens": 3075477.0,
|
|
"reward": 2.60770320892334e-08,
|
|
"reward_std": 1.0472596883773804,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.60770320892334e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010678083797130186,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11394385265661125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.045338235029118164,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 57
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1392.0,
|
|
"completions/max_terminated_length": 1392.0,
|
|
"completions/mean_length": 970.625,
|
|
"completions/mean_terminated_length": 970.625,
|
|
"completions/min_length": 715.0,
|
|
"completions/min_terminated_length": 715.0,
|
|
"epoch": 0.07263619286161553,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5943049280036243,
|
|
"kl": 0.0017871856689453125,
|
|
"learning_rate": 7.125e-07,
|
|
"loss": -0.0489,
|
|
"num_tokens": 3103423.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8579948544502258,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11955284309699343,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1294140259487627,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 58
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1451.0,
|
|
"completions/mean_length": 1484.75,
|
|
"completions/mean_terminated_length": 1378.0,
|
|
"completions/min_length": 1305.0,
|
|
"completions/min_terminated_length": 1305.0,
|
|
"epoch": 0.07388854101440201,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.914915162479294,
|
|
"kl": 0.0023479461669921875,
|
|
"learning_rate": 7.249999999999999e-07,
|
|
"loss": -0.0133,
|
|
"num_tokens": 3170979.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0570372343063354,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004701879619984315,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0950367185128266,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 59
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1359.9375,
|
|
"completions/mean_terminated_length": 1179.857177734375,
|
|
"completions/min_length": 406.0,
|
|
"completions/min_terminated_length": 406.0,
|
|
"epoch": 0.07514088916718847,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.283089786479577,
|
|
"kl": 0.002674102783203125,
|
|
"learning_rate": 7.375e-07,
|
|
"loss": -0.0525,
|
|
"num_tokens": 3233802.0,
|
|
"reward": -1.6763806343078613e-08,
|
|
"reward_std": 1.050881028175354,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.6763806343078613e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02996931362982372,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07266154836265915,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238706,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 60
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.07639323731997495,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.311519312396822,
|
|
"kl": 0.00279998779296875,
|
|
"learning_rate": 7.5e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 3293354.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.0101943016052246,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04793344228148064,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12274932480508612,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 61
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1454.0,
|
|
"completions/mean_length": 1410.1875,
|
|
"completions/mean_terminated_length": 1294.71435546875,
|
|
"completions/min_length": 1137.0,
|
|
"completions/min_terminated_length": 1137.0,
|
|
"epoch": 0.07764558547276143,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0789417256546874,
|
|
"kl": 0.0022792816162109375,
|
|
"learning_rate": 7.624999999999999e-07,
|
|
"loss": -0.0109,
|
|
"num_tokens": 3334909.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9900147914886475,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004903461451645089,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03771048515625185,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15581327856693658,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 62
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1316.0,
|
|
"completions/mean_length": 1133.0,
|
|
"completions/mean_terminated_length": 1108.533447265625,
|
|
"completions/min_length": 957.0,
|
|
"completions/min_terminated_length": 957.0,
|
|
"epoch": 0.07889793362554791,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.259053190740638,
|
|
"kl": 0.0018634796142578125,
|
|
"learning_rate": 7.75e-07,
|
|
"loss": -0.0194,
|
|
"num_tokens": 3383333.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.671829104423523,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09142372399409204,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09598955648379433,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575907,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 63
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1484.0,
|
|
"completions/mean_length": 1463.0,
|
|
"completions/mean_terminated_length": 1352.0,
|
|
"completions/min_length": 1206.0,
|
|
"completions/min_terminated_length": 1206.0,
|
|
"epoch": 0.08015028177833437,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.187716368550353,
|
|
"kl": 0.002471923828125,
|
|
"learning_rate": 7.875e-07,
|
|
"loss": 0.0106,
|
|
"num_tokens": 3442269.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0351850986480713,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12370484162737726,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1619343847332339,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457553,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 64
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 1410.625,
|
|
"completions/mean_terminated_length": 1261.666748046875,
|
|
"completions/min_length": 995.0,
|
|
"completions/min_terminated_length": 995.0,
|
|
"epoch": 0.08140262993112085,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.218117740066407,
|
|
"kl": 0.002559661865234375,
|
|
"learning_rate": 8e-07,
|
|
"loss": -0.0443,
|
|
"num_tokens": 3489911.0,
|
|
"reward": 5.960464477539063e-08,
|
|
"reward_std": 0.5395079851150513,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061171909778282046,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06618755934392026,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460886,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 65
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1491.0,
|
|
"completions/mean_length": 1316.6875,
|
|
"completions/mean_terminated_length": 1206.7000732421875,
|
|
"completions/min_length": 869.0,
|
|
"completions/min_terminated_length": 869.0,
|
|
"epoch": 0.08265497808390733,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.223646819331395,
|
|
"kl": 0.002506256103515625,
|
|
"learning_rate": 8.125e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 3531330.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9571313858032227,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.027972706586888517,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1908156027057365,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08734775114237132,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 66
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1330.0,
|
|
"completions/mean_length": 1478.5625,
|
|
"completions/mean_terminated_length": 1328.5,
|
|
"completions/min_length": 1327.0,
|
|
"completions/min_terminated_length": 1327.0,
|
|
"epoch": 0.08390732623669381,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.411248138087788,
|
|
"kl": 0.00255584716796875,
|
|
"learning_rate": 8.249999999999999e-07,
|
|
"loss": 0.0085,
|
|
"num_tokens": 3591331.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.6705090403556824,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3499282464198203,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3060898603663511,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 67
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1422.0,
|
|
"completions/mean_length": 1495.125,
|
|
"completions/mean_terminated_length": 1422.0,
|
|
"completions/min_length": 1422.0,
|
|
"completions/min_terminated_length": 1422.0,
|
|
"epoch": 0.08515967438948027,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.025267713589772,
|
|
"kl": 0.002880096435546875,
|
|
"learning_rate": 8.375e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 3658421.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9633276462554932,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07580010422442789,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17700501480681413,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533113,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 68
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1309.0,
|
|
"completions/mean_length": 1469.875,
|
|
"completions/mean_terminated_length": 1259.0,
|
|
"completions/min_length": 1209.0,
|
|
"completions/min_terminated_length": 1209.0,
|
|
"epoch": 0.08641202254226675,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0466734222423546,
|
|
"kl": 0.002544403076171875,
|
|
"learning_rate": 8.499999999999999e-07,
|
|
"loss": 0.0044,
|
|
"num_tokens": 3724899.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0227458477020264,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.002677645774302454,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11990711113827299,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457552,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 69
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1499.0,
|
|
"completions/mean_length": 1410.8125,
|
|
"completions/mean_terminated_length": 1321.625,
|
|
"completions/min_length": 1070.0,
|
|
"completions/min_terminated_length": 1070.0,
|
|
"epoch": 0.08766437069505323,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.318934314260283,
|
|
"kl": 0.002834320068359375,
|
|
"learning_rate": 8.625e-07,
|
|
"loss": 0.0072,
|
|
"num_tokens": 3777184.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0494259595870972,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024827264373621732,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.036366284403351476,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0859586463881842,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 70
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1399.25,
|
|
"completions/mean_terminated_length": 1231.3333740234375,
|
|
"completions/min_length": 1009.0,
|
|
"completions/min_terminated_length": 1009.0,
|
|
"epoch": 0.08891671884783969,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8509264779724033,
|
|
"kl": 0.002223968505859375,
|
|
"learning_rate": 8.75e-07,
|
|
"loss": 0.0037,
|
|
"num_tokens": 3836428.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0668516159057617,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053166199498163626,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12647299276011556,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242309,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 71
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1447.0,
|
|
"completions/mean_length": 1199.625,
|
|
"completions/mean_terminated_length": 1130.3077392578125,
|
|
"completions/min_length": 968.0,
|
|
"completions/min_terminated_length": 968.0,
|
|
"epoch": 0.09016906700062617,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.886201838693034,
|
|
"kl": 0.001605987548828125,
|
|
"learning_rate": 8.874999999999999e-07,
|
|
"loss": -0.0027,
|
|
"num_tokens": 3881094.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8761758804321289,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.02026371657719268,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04408943383486411,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 72
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.09142141515341265,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.60284832797847,
|
|
"kl": 0.00217437744140625,
|
|
"learning_rate": 9e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 3940518.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5877071619033813,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027393406712592036,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0844493241747004,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.067631901304592,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 73
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 1269.3125,
|
|
"completions/mean_terminated_length": 1192.416748046875,
|
|
"completions/min_length": 959.0,
|
|
"completions/min_terminated_length": 959.0,
|
|
"epoch": 0.09267376330619913,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2612995556206354,
|
|
"kl": 0.002330780029296875,
|
|
"learning_rate": 9.124999999999999e-07,
|
|
"loss": -0.0066,
|
|
"num_tokens": 3982827.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.924209713935852,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021465279786927867,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03289535545475229,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804345,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 74
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1419.0,
|
|
"completions/mean_length": 1346.4375,
|
|
"completions/mean_terminated_length": 1227.0,
|
|
"completions/min_length": 1081.0,
|
|
"completions/min_terminated_length": 1081.0,
|
|
"epoch": 0.09392611145898559,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6195723075826596,
|
|
"kl": 0.00183868408203125,
|
|
"learning_rate": 9.25e-07,
|
|
"loss": -0.0361,
|
|
"num_tokens": 4041194.0,
|
|
"reward": 1.1175870895385742e-08,
|
|
"reward_std": 1.0540246963500977,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.1175870895385742e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03654417489517675,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055054088822312976,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07588978362901858,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
|
|
"step": 75
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1492.0625,
|
|
"completions/mean_terminated_length": 1436.5,
|
|
"completions/min_length": 1381.0,
|
|
"completions/min_terminated_length": 1381.0,
|
|
"epoch": 0.09517845961177207,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1883337396909632,
|
|
"kl": 0.0028228759765625,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 4102531.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7272332906723022,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657008296291109,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08174957503379145,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437976,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 76
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1428.0,
|
|
"completions/mean_length": 1347.875,
|
|
"completions/mean_terminated_length": 1152.2857666015625,
|
|
"completions/min_length": 807.0,
|
|
"completions/min_terminated_length": 807.0,
|
|
"epoch": 0.09643080776455855,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.323504050782515,
|
|
"kl": 0.002685546875,
|
|
"learning_rate": 9.499999999999999e-07,
|
|
"loss": -0.012,
|
|
"num_tokens": 4154537.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9932632446289062,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12575056940966298,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15133213208857665,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 77
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 1434.4375,
|
|
"completions/mean_terminated_length": 1290.2000732421875,
|
|
"completions/min_length": 1178.0,
|
|
"completions/min_terminated_length": 1178.0,
|
|
"epoch": 0.09768315591734503,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4382048596496553,
|
|
"kl": 0.002773284912109375,
|
|
"learning_rate": 9.624999999999999e-07,
|
|
"loss": -0.0322,
|
|
"num_tokens": 4221464.0,
|
|
"reward": -2.60770320892334e-08,
|
|
"reward_std": 1.0265973806381226,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08170559900334663,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10185399685140464,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.161245154965971,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 78
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1248.3125,
|
|
"completions/mean_terminated_length": 1097.300048828125,
|
|
"completions/min_length": 870.0,
|
|
"completions/min_terminated_length": 870.0,
|
|
"epoch": 0.09893550407013149,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.648370216175046,
|
|
"kl": 0.0019168853759765625,
|
|
"learning_rate": 9.75e-07,
|
|
"loss": -0.027,
|
|
"num_tokens": 4267669.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9588196873664856,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07500714246624458,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993198507995109,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05288001793018134,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 79
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 1436.75,
|
|
"completions/mean_terminated_length": 1247.0,
|
|
"completions/min_length": 1132.0,
|
|
"completions/min_terminated_length": 1132.0,
|
|
"epoch": 0.10018785222291797,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.879418147641467,
|
|
"kl": 0.0019855499267578125,
|
|
"learning_rate": 9.875e-07,
|
|
"loss": -0.0127,
|
|
"num_tokens": 4328465.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9200654029846191,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1453335125370645,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1827536027247548,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09496588081262934,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 80
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1422.1875,
|
|
"completions/mean_terminated_length": 1344.375,
|
|
"completions/min_length": 1237.0,
|
|
"completions/min_terminated_length": 1237.0,
|
|
"epoch": 0.10144020037570445,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.635617733673008,
|
|
"kl": 0.0017528533935546875,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0046,
|
|
"num_tokens": 4373324.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0682477951049805,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.053201182409366166,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.044798463974146746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087683,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 81
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1459.0,
|
|
"completions/mean_length": 1170.5,
|
|
"completions/mean_terminated_length": 1020.727294921875,
|
|
"completions/min_length": 844.0,
|
|
"completions/min_terminated_length": 844.0,
|
|
"epoch": 0.10269254852849093,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8578219249339107,
|
|
"kl": 0.0019054412841796875,
|
|
"learning_rate": 9.999957044004145e-07,
|
|
"loss": -0.0353,
|
|
"num_tokens": 4419844.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.4868781566619873,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1691690312178033,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1856850439917278,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08509254221575908,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 82
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1413.0,
|
|
"completions/mean_length": 1202.4375,
|
|
"completions/mean_terminated_length": 1182.60009765625,
|
|
"completions/min_length": 943.0,
|
|
"completions/min_terminated_length": 943.0,
|
|
"epoch": 0.10394489668127739,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.206094248867889,
|
|
"kl": 0.0022640228271484375,
|
|
"learning_rate": 9.999828176836682e-07,
|
|
"loss": -0.0042,
|
|
"num_tokens": 4464763.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 0.9854896068572998,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11969234946420118,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3068885267137289,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 83
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 1347.25,
|
|
"completions/mean_terminated_length": 1228.4444580078125,
|
|
"completions/min_length": 872.0,
|
|
"completions/min_terminated_length": 872.0,
|
|
"epoch": 0.10519724483406387,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3502401935196273,
|
|
"kl": 0.0025177001953125,
|
|
"learning_rate": 9.99961340095788e-07,
|
|
"loss": -0.0232,
|
|
"num_tokens": 4520295.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.0421638488769531,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04940475583906399,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10190244243958202,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12102953419784838,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 84
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1485.0,
|
|
"completions/mean_length": 1309.6875,
|
|
"completions/mean_terminated_length": 1265.769287109375,
|
|
"completions/min_length": 859.0,
|
|
"completions/min_terminated_length": 859.0,
|
|
"epoch": 0.10644959298685035,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.728411933718049,
|
|
"kl": 0.001689910888671875,
|
|
"learning_rate": 9.99931272046815e-07,
|
|
"loss": -0.0142,
|
|
"num_tokens": 4576338.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8622345924377441,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.016984465370970727,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040579939841277814,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 85
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1341.5625,
|
|
"completions/mean_terminated_length": 1269.5455322265625,
|
|
"completions/min_length": 982.0,
|
|
"completions/min_terminated_length": 982.0,
|
|
"epoch": 0.10770194113963683,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.200552135647029,
|
|
"kl": 0.002315521240234375,
|
|
"learning_rate": 9.998926141107945e-07,
|
|
"loss": 0.0351,
|
|
"num_tokens": 4618667.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8471476435661316,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22087111411084098,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24091025740898386,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 86
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1394.6875,
|
|
"completions/mean_terminated_length": 1259.2857666015625,
|
|
"completions/min_length": 1069.0,
|
|
"completions/min_terminated_length": 1069.0,
|
|
"epoch": 0.10895428929242329,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.177667386837352,
|
|
"kl": 0.002468109130859375,
|
|
"learning_rate": 9.998453670257666e-07,
|
|
"loss": 0.0024,
|
|
"num_tokens": 4675550.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.3878336548805237,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06448512648276508,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0842294519714606,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12405196043952266,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 87
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1363.0,
|
|
"completions/mean_length": 1469.6875,
|
|
"completions/mean_terminated_length": 1338.3333740234375,
|
|
"completions/min_length": 1296.0,
|
|
"completions/min_terminated_length": 1296.0,
|
|
"epoch": 0.11020663744520977,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.074898456526424,
|
|
"kl": 0.00238037109375,
|
|
"learning_rate": 9.997895316937517e-07,
|
|
"loss": 0.0066,
|
|
"num_tokens": 4734649.0,
|
|
"reward": -4.470348358154297e-08,
|
|
"reward_std": 0.9637711048126221,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09676546074924117,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06959776462437538,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 88
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1454.0,
|
|
"completions/mean_length": 1303.1875,
|
|
"completions/mean_terminated_length": 1237.5833740234375,
|
|
"completions/min_length": 1039.0,
|
|
"completions/min_terminated_length": 1039.0,
|
|
"epoch": 0.11145898559799625,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3833185192510284,
|
|
"kl": 0.001354217529296875,
|
|
"learning_rate": 9.997251091807332e-07,
|
|
"loss": 0.0171,
|
|
"num_tokens": 4789676.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.016492486000061,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12777237426683458,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21498123308224262,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11080513425729775,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 89
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1348.0,
|
|
"completions/mean_length": 1453.0625,
|
|
"completions/mean_terminated_length": 1249.666748046875,
|
|
"completions/min_length": 1149.0,
|
|
"completions/min_terminated_length": 1149.0,
|
|
"epoch": 0.11271133375078271,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1689450227648854,
|
|
"kl": 0.002933502197265625,
|
|
"learning_rate": 9.99652100716637e-07,
|
|
"loss": -0.0062,
|
|
"num_tokens": 4847781.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.64935302734375,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16229754855451553,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20151739444607794,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.18373692949230228,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 90
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1450.0,
|
|
"completions/mean_terminated_length": 1300.0,
|
|
"completions/min_length": 1049.0,
|
|
"completions/min_terminated_length": 1049.0,
|
|
"epoch": 0.11396368190356919,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0146226006623476,
|
|
"kl": 0.002593994140625,
|
|
"learning_rate": 9.995705076953075e-07,
|
|
"loss": -0.0291,
|
|
"num_tokens": 4905421.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0383461713790894,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06052119205813296,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12160618129006116,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09108400680852977,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 91
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 910.0,
|
|
"completions/mean_length": 1144.0625,
|
|
"completions/mean_terminated_length": 788.125,
|
|
"completions/min_length": 610.0,
|
|
"completions/min_terminated_length": 610.0,
|
|
"epoch": 0.11521603005635567,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.228817256723133,
|
|
"kl": 0.0014410018920898438,
|
|
"learning_rate": 9.994803316744828e-07,
|
|
"loss": 0.0105,
|
|
"num_tokens": 4950462.0,
|
|
"reward": -4.470348358154297e-08,
|
|
"reward_std": 0.9390549659729004,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07564319510568883,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1514996148617109,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15770342536029575,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 92
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1471.0,
|
|
"completions/max_terminated_length": 1471.0,
|
|
"completions/mean_length": 1093.8125,
|
|
"completions/mean_terminated_length": 1093.8125,
|
|
"completions/min_length": 638.0,
|
|
"completions/min_terminated_length": 638.0,
|
|
"epoch": 0.11646837820914215,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.6363277397384617,
|
|
"kl": 0.002498626708984375,
|
|
"learning_rate": 9.993815743757633e-07,
|
|
"loss": -0.0484,
|
|
"num_tokens": 4983835.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8996579647064209,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0037569304970198007,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07736656048737343,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.2014760347847669,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 93
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 1417.75,
|
|
"completions/mean_terminated_length": 1335.5,
|
|
"completions/min_length": 1111.0,
|
|
"completions/min_terminated_length": 1111.0,
|
|
"epoch": 0.11772072636192861,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.20990587817039,
|
|
"kl": 0.002735137939453125,
|
|
"learning_rate": 9.99274237684579e-07,
|
|
"loss": 0.004,
|
|
"num_tokens": 5030407.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6368776559829712,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.029770016601004534,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0349532410691535,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10461569884316813,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 94
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1347.0,
|
|
"completions/max_terminated_length": 1347.0,
|
|
"completions/mean_length": 926.8125,
|
|
"completions/mean_terminated_length": 926.8125,
|
|
"completions/min_length": 631.0,
|
|
"completions/min_terminated_length": 631.0,
|
|
"epoch": 0.11897307451471509,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7699870834508333,
|
|
"kl": 0.0008082389831542969,
|
|
"learning_rate": 9.99158323650154e-07,
|
|
"loss": -0.0527,
|
|
"num_tokens": 5074556.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0668668746948242,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01722883909028131,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19517428674960768,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0843274042711568,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 95
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1393.0,
|
|
"completions/mean_length": 1283.5625,
|
|
"completions/mean_terminated_length": 1115.2222900390625,
|
|
"completions/min_length": 942.0,
|
|
"completions/min_terminated_length": 942.0,
|
|
"epoch": 0.12022542266750157,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2021886227663034,
|
|
"kl": 0.002685546875,
|
|
"learning_rate": 9.990338344854676e-07,
|
|
"loss": -0.0074,
|
|
"num_tokens": 5120597.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9720104336738586,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.024841432663237503,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17561297504079998,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 96
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 1415.3125,
|
|
"completions/mean_terminated_length": 1364.5,
|
|
"completions/min_length": 1206.0,
|
|
"completions/min_terminated_length": 1206.0,
|
|
"epoch": 0.12147777082028804,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2246665277704185,
|
|
"kl": 0.002559661865234375,
|
|
"learning_rate": 9.989007725672113e-07,
|
|
"loss": 0.0063,
|
|
"num_tokens": 5158170.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7684129476547241,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0825,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1586400537905439,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 97
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1334.0,
|
|
"completions/mean_length": 1175.3125,
|
|
"completions/mean_terminated_length": 922.7777709960938,
|
|
"completions/min_length": 596.0,
|
|
"completions/min_terminated_length": 596.0,
|
|
"epoch": 0.12273011897307451,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.169149502657088,
|
|
"kl": 0.00231170654296875,
|
|
"learning_rate": 9.987591404357437e-07,
|
|
"loss": -0.0811,
|
|
"num_tokens": 5215647.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9120274782180786,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005036444545787546,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10234315753446507,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1387777332977422,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 98
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1366.0,
|
|
"completions/mean_length": 1264.5,
|
|
"completions/mean_terminated_length": 1123.2000732421875,
|
|
"completions/min_length": 983.0,
|
|
"completions/min_terminated_length": 983.0,
|
|
"epoch": 0.12398246712586099,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9409598040312765,
|
|
"kl": 0.002063751220703125,
|
|
"learning_rate": 9.986089407950426e-07,
|
|
"loss": -0.0453,
|
|
"num_tokens": 5250879.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0199556350708008,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11830339701018143,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.25916185560707883,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408157,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 99
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1437.0,
|
|
"completions/mean_length": 1406.9375,
|
|
"completions/mean_terminated_length": 1251.8333740234375,
|
|
"completions/min_length": 906.0,
|
|
"completions/min_terminated_length": 906.0,
|
|
"epoch": 0.12523481527864747,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.236489502184281,
|
|
"kl": 0.0029754638671875,
|
|
"learning_rate": 9.98450176512652e-07,
|
|
"loss": 0.0261,
|
|
"num_tokens": 5303030.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8868198990821838,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.14501472660672157,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15004116932595393,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1172998689652263,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 100
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 1374.0625,
|
|
"completions/mean_terminated_length": 1332.0833740234375,
|
|
"completions/min_length": 1208.0,
|
|
"completions/min_terminated_length": 1208.0,
|
|
"epoch": 0.12648716343143393,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7393115321098898,
|
|
"kl": 0.0021686553955078125,
|
|
"learning_rate": 9.982828506196295e-07,
|
|
"loss": 0.0475,
|
|
"num_tokens": 5348991.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.744665265083313,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.16115596269847898,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19475646493041288,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 101
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1472.0,
|
|
"completions/mean_length": 1392.25,
|
|
"completions/mean_terminated_length": 1284.5,
|
|
"completions/min_length": 957.0,
|
|
"completions/min_terminated_length": 957.0,
|
|
"epoch": 0.12773951158422042,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.465229932517055,
|
|
"kl": 0.00170135498046875,
|
|
"learning_rate": 9.981069663104853e-07,
|
|
"loss": -0.0292,
|
|
"num_tokens": 5393291.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9994131326675415,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010671914654693294,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027094219261353553,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 102
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1388.5625,
|
|
"completions/mean_terminated_length": 1277.125,
|
|
"completions/min_length": 1062.0,
|
|
"completions/min_terminated_length": 1062.0,
|
|
"epoch": 0.1289918597370069,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.97886098445916,
|
|
"kl": 0.00238800048828125,
|
|
"learning_rate": 9.979225269431252e-07,
|
|
"loss": 0.0455,
|
|
"num_tokens": 5437588.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0143799781799316,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0018910121903646018,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21804038685357507,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057183,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 103
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 1159.8125,
|
|
"completions/mean_terminated_length": 955.7000122070312,
|
|
"completions/min_length": 402.0,
|
|
"completions/min_terminated_length": 402.0,
|
|
"epoch": 0.13024420788979335,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2070297326725687,
|
|
"kl": 0.0024261474609375,
|
|
"learning_rate": 9.977295360387827e-07,
|
|
"loss": -0.0325,
|
|
"num_tokens": 5469273.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.848124623298645,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0002889221914715882,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03991849505429317,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.4875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1495053572680653,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 104
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1405.4375,
|
|
"completions/mean_terminated_length": 1247.8333740234375,
|
|
"completions/min_length": 959.0,
|
|
"completions/min_terminated_length": 959.0,
|
|
"epoch": 0.13149655604257984,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1693981909983457,
|
|
"kl": 0.00269317626953125,
|
|
"learning_rate": 9.97527997281954e-07,
|
|
"loss": -0.0085,
|
|
"num_tokens": 5527744.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0289491415023804,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07601873282977642,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2329329780235847,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0787635937708768,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 105
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1250.0,
|
|
"completions/mean_length": 1258.5625,
|
|
"completions/mean_terminated_length": 1070.77783203125,
|
|
"completions/min_length": 958.0,
|
|
"completions/min_terminated_length": 958.0,
|
|
"epoch": 0.1327489041953663,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4731519538264903,
|
|
"kl": 0.0015192031860351562,
|
|
"learning_rate": 9.973179145203272e-07,
|
|
"loss": -0.0122,
|
|
"num_tokens": 5571305.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.046633243560791,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.027299266065874364,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09683294681842305,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
|
|
"step": 106
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1376.0,
|
|
"completions/mean_length": 1361.875,
|
|
"completions/mean_terminated_length": 1223.75,
|
|
"completions/min_length": 937.0,
|
|
"completions/min_terminated_length": 937.0,
|
|
"epoch": 0.1340012523481528,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.781242946832243,
|
|
"kl": 0.0024871826171875,
|
|
"learning_rate": 9.970992917647088e-07,
|
|
"loss": -0.0163,
|
|
"num_tokens": 5617855.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9318596124649048,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19798356691808755,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.29651415192877617,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11021863793455328,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 107
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1487.0,
|
|
"completions/mean_length": 1146.0625,
|
|
"completions/mean_terminated_length": 1122.4666748046875,
|
|
"completions/min_length": 848.0,
|
|
"completions/min_terminated_length": 848.0,
|
|
"epoch": 0.13525360050093926,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9997009952780855,
|
|
"kl": 0.0022125244140625,
|
|
"learning_rate": 9.968721331889465e-07,
|
|
"loss": 0.0235,
|
|
"num_tokens": 5654992.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0186116695404053,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0558045951815816,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.029030233660680062,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242312,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 108
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1390.0,
|
|
"completions/mean_length": 1410.4375,
|
|
"completions/mean_terminated_length": 1261.166748046875,
|
|
"completions/min_length": 1123.0,
|
|
"completions/min_terminated_length": 1123.0,
|
|
"epoch": 0.13650594865372573,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9682648437410637,
|
|
"kl": 0.002681732177734375,
|
|
"learning_rate": 9.966364431298509e-07,
|
|
"loss": -0.022,
|
|
"num_tokens": 5711927.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0176870822906494,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.26425948065238597,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.28899722395436095,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820636,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 109
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1455.0,
|
|
"completions/mean_length": 1212.4375,
|
|
"completions/mean_terminated_length": 1081.727294921875,
|
|
"completions/min_length": 791.0,
|
|
"completions/min_terminated_length": 791.0,
|
|
"epoch": 0.13775829680651222,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.323352542220494,
|
|
"kl": 0.002532958984375,
|
|
"learning_rate": 9.963922260871115e-07,
|
|
"loss": -0.0134,
|
|
"num_tokens": 5754094.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.9666612148284912,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.051175618061779164,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.039320213077717464,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5833333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516195,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 110
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 1485.25,
|
|
"completions/mean_terminated_length": 1421.3333740234375,
|
|
"completions/min_length": 1380.0,
|
|
"completions/min_terminated_length": 1380.0,
|
|
"epoch": 0.13901064495929868,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8360270019245446,
|
|
"kl": 0.0024871826171875,
|
|
"learning_rate": 9.9613948672321e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 5814162.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0610442161560059,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009639880768854782,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045421738289270215,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12292725943057184,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 111
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1323.0,
|
|
"completions/mean_length": 1434.75,
|
|
"completions/mean_terminated_length": 1239.0,
|
|
"completions/min_length": 1100.0,
|
|
"completions/min_terminated_length": 1100.0,
|
|
"epoch": 0.14026299311208515,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.999336260576351,
|
|
"kl": 0.0024852752685546875,
|
|
"learning_rate": 9.958782298633351e-07,
|
|
"loss": -0.0196,
|
|
"num_tokens": 5879078.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7917496562004089,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03413289340922598,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05688585018947227,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 112
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1290.0,
|
|
"completions/mean_length": 1377.4375,
|
|
"completions/mean_terminated_length": 1173.166748046875,
|
|
"completions/min_length": 998.0,
|
|
"completions/min_terminated_length": 998.0,
|
|
"epoch": 0.14151534126487164,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.661753098948472,
|
|
"kl": 0.0021266937255859375,
|
|
"learning_rate": 9.95608460495285e-07,
|
|
"loss": -0.0087,
|
|
"num_tokens": 5933045.0,
|
|
"reward": 2.2351741790771484e-08,
|
|
"reward_std": 1.0039006471633911,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05284198848548562,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05437266883758088,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 113
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1187.0,
|
|
"completions/mean_length": 1322.25,
|
|
"completions/mean_terminated_length": 1093.71435546875,
|
|
"completions/min_length": 991.0,
|
|
"completions/min_terminated_length": 991.0,
|
|
"epoch": 0.1427676894176581,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8780144010263284,
|
|
"kl": 0.0020542144775390625,
|
|
"learning_rate": 9.953301837693767e-07,
|
|
"loss": 0.003,
|
|
"num_tokens": 5979113.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8175742626190186,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005502994719066203,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06974582191643876,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0758897836290186,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 114
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1401.1875,
|
|
"completions/mean_terminated_length": 1324.3333740234375,
|
|
"completions/min_length": 1118.0,
|
|
"completions/min_terminated_length": 1118.0,
|
|
"epoch": 0.14402003757044457,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.139329899307579,
|
|
"kl": 0.002716064453125,
|
|
"learning_rate": 9.95043404998345e-07,
|
|
"loss": 0.0292,
|
|
"num_tokens": 6040452.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0616416931152344,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03748903917915849,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14395002297286164,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1641476300299351,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 115
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1452.0,
|
|
"completions/mean_length": 1185.375,
|
|
"completions/mean_terminated_length": 1140.4285888671875,
|
|
"completions/min_length": 804.0,
|
|
"completions/min_terminated_length": 804.0,
|
|
"epoch": 0.14527238572323106,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3086990939983667,
|
|
"kl": 0.0029144287109375,
|
|
"learning_rate": 9.947481296572423e-07,
|
|
"loss": -0.014,
|
|
"num_tokens": 6090810.0,
|
|
"reward": -2.2351741790771484e-08,
|
|
"reward_std": 1.0066075325012207,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05014218857813404,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09276403913432626,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 116
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1433.0,
|
|
"completions/mean_length": 1351.9375,
|
|
"completions/mean_terminated_length": 1263.0999755859375,
|
|
"completions/min_length": 1043.0,
|
|
"completions/min_terminated_length": 1043.0,
|
|
"epoch": 0.14652473387601753,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.035226656450535,
|
|
"kl": 0.002376556396484375,
|
|
"learning_rate": 9.944443633833335e-07,
|
|
"loss": 0.0179,
|
|
"num_tokens": 6148881.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7348309755325317,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0762897874284947,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12841725021840134,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036262,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 117
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1392.0,
|
|
"completions/mean_length": 1474.6875,
|
|
"completions/mean_terminated_length": 1297.5,
|
|
"completions/min_length": 1203.0,
|
|
"completions/min_terminated_length": 1203.0,
|
|
"epoch": 0.14777708202880402,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9234616863737957,
|
|
"kl": 0.0024566650390625,
|
|
"learning_rate": 9.94132111975989e-07,
|
|
"loss": 0.0031,
|
|
"num_tokens": 6213916.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.5194555521011353,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.018562499999999996,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.024749999999999994,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437974,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 118
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1228.0,
|
|
"completions/mean_length": 1220.0625,
|
|
"completions/mean_terminated_length": 940.125,
|
|
"completions/min_length": 820.0,
|
|
"completions/min_terminated_length": 820.0,
|
|
"epoch": 0.14902943018159048,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7331417657136603,
|
|
"kl": 0.002094268798828125,
|
|
"learning_rate": 9.93811381396573e-07,
|
|
"loss": -0.0031,
|
|
"num_tokens": 6257485.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7746272087097168,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02009986693954008,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07362867807980181,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 119
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1400.375,
|
|
"completions/mean_terminated_length": 1340.5999755859375,
|
|
"completions/min_length": 1181.0,
|
|
"completions/min_terminated_length": 1181.0,
|
|
"epoch": 0.15028177833437695,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.834637825788323,
|
|
"kl": 0.003093719482421875,
|
|
"learning_rate": 9.934821777683306e-07,
|
|
"loss": 0.0269,
|
|
"num_tokens": 6319963.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0544224977493286,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.18103321643586406,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14394672405121658,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8083333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 120
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1428.0,
|
|
"completions/mean_length": 1325.5,
|
|
"completions/mean_terminated_length": 1189.77783203125,
|
|
"completions/min_length": 853.0,
|
|
"completions/min_terminated_length": 853.0,
|
|
"epoch": 0.15153412648716344,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2642836490036453,
|
|
"kl": 0.0030364990234375,
|
|
"learning_rate": 9.93144507376271e-07,
|
|
"loss": -0.005,
|
|
"num_tokens": 6385427.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8268899917602539,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1112911236291226,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1569615458099141,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6124999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568493,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 121
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1465.0,
|
|
"completions/mean_length": 1305.875,
|
|
"completions/mean_terminated_length": 1217.6363525390625,
|
|
"completions/min_length": 922.0,
|
|
"completions/min_terminated_length": 922.0,
|
|
"epoch": 0.1527864746399499,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6641398914857923,
|
|
"kl": 0.002033233642578125,
|
|
"learning_rate": 9.927983766670462e-07,
|
|
"loss": -0.0098,
|
|
"num_tokens": 6440177.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0115642547607422,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06872988161057395,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1025211626906069,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 122
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1338.0,
|
|
"completions/mean_length": 1239.6875,
|
|
"completions/mean_terminated_length": 1083.5,
|
|
"completions/min_length": 886.0,
|
|
"completions/min_terminated_length": 886.0,
|
|
"epoch": 0.15403882279273637,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9543701078797575,
|
|
"kl": 0.0018558502197265625,
|
|
"learning_rate": 9.924437922488291e-07,
|
|
"loss": 0.0245,
|
|
"num_tokens": 6498212.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6738491654396057,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.038590091343060344,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09510012784467493,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12524050936172842,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 123
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1499.0,
|
|
"completions/mean_length": 1403.625,
|
|
"completions/mean_terminated_length": 1345.800048828125,
|
|
"completions/min_length": 1121.0,
|
|
"completions/min_terminated_length": 1121.0,
|
|
"epoch": 0.15529117094552286,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5178341976839556,
|
|
"kl": 0.0033111572265625,
|
|
"learning_rate": 9.920807608911876e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 6553902.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8796525597572327,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.013190710670885862,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1480868926971966,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 124
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 825.0,
|
|
"completions/mean_length": 1112.5625,
|
|
"completions/mean_terminated_length": 725.125,
|
|
"completions/min_length": 613.0,
|
|
"completions/min_terminated_length": 613.0,
|
|
"epoch": 0.15654351909830932,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.378105432163651,
|
|
"kl": 0.0008687973022460938,
|
|
"learning_rate": 9.917092895249543e-07,
|
|
"loss": -0.0272,
|
|
"num_tokens": 6589311.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9441956877708435,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014329624416098018,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.112902138916422,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12758439472669758,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 125
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1404.0,
|
|
"completions/mean_length": 1137.0625,
|
|
"completions/mean_terminated_length": 1053.3077392578125,
|
|
"completions/min_length": 749.0,
|
|
"completions/min_terminated_length": 749.0,
|
|
"epoch": 0.15779586725109582,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2527212324187826,
|
|
"kl": 0.002117156982421875,
|
|
"learning_rate": 9.913293852420946e-07,
|
|
"loss": -0.0249,
|
|
"num_tokens": 6618304.0,
|
|
"reward": 2.2351741790771484e-08,
|
|
"reward_std": 1.035041093826294,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007633954846541112,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.032194935573291575,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.103905227473387,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 126
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1405.0,
|
|
"completions/mean_length": 1462.125,
|
|
"completions/mean_terminated_length": 1348.5,
|
|
"completions/min_length": 1255.0,
|
|
"completions/min_terminated_length": 1255.0,
|
|
"epoch": 0.15904821540388228,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.091983329824593,
|
|
"kl": 0.00301361083984375,
|
|
"learning_rate": 9.909410552955712e-07,
|
|
"loss": 0.0155,
|
|
"num_tokens": 6681314.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.784981369972229,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12737730164130195,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.21747166290242714,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.093392838174146,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 127
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1474.0,
|
|
"completions/mean_length": 1378.875,
|
|
"completions/mean_terminated_length": 1223.1429443359375,
|
|
"completions/min_length": 904.0,
|
|
"completions/min_terminated_length": 904.0,
|
|
"epoch": 0.16030056355666875,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2503169455658982,
|
|
"kl": 0.002620697021484375,
|
|
"learning_rate": 9.905443070992068e-07,
|
|
"loss": -0.0039,
|
|
"num_tokens": 6723448.0,
|
|
"reward": -3.3527612686157227e-08,
|
|
"reward_std": 1.06490159034729,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.3527612686157227e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07877405649297206,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0705921273253386,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 128
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1476.0,
|
|
"completions/mean_length": 1418.0,
|
|
"completions/mean_terminated_length": 1281.3333740234375,
|
|
"completions/min_length": 1167.0,
|
|
"completions/min_terminated_length": 1167.0,
|
|
"epoch": 0.16155291170945524,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.419069025864104,
|
|
"kl": 0.003154754638671875,
|
|
"learning_rate": 9.901391482275403e-07,
|
|
"loss": -0.0084,
|
|
"num_tokens": 6774208.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9308052062988281,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06996807244867725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1266299752409378,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0909822937597079,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 129
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1098.0,
|
|
"completions/mean_length": 1249.3125,
|
|
"completions/mean_terminated_length": 998.625,
|
|
"completions/min_length": 929.0,
|
|
"completions/min_terminated_length": 929.0,
|
|
"epoch": 0.1628052598622417,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.593196585544164,
|
|
"kl": 0.001987457275390625,
|
|
"learning_rate": 9.897255864156847e-07,
|
|
"loss": 0.0036,
|
|
"num_tokens": 6807421.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.4564354419708252,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0429616858320893,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07600285040401121,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 130
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1394.0,
|
|
"completions/mean_length": 1382.375,
|
|
"completions/mean_terminated_length": 1231.1429443359375,
|
|
"completions/min_length": 1075.0,
|
|
"completions/min_terminated_length": 1075.0,
|
|
"epoch": 0.16405760801502817,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0914996686420104,
|
|
"kl": 0.002330780029296875,
|
|
"learning_rate": 9.893036295591768e-07,
|
|
"loss": -0.0116,
|
|
"num_tokens": 6866379.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.9815191626548767,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04217953361323695,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06871670933278229,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 131
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1424.0,
|
|
"completions/mean_length": 1347.125,
|
|
"completions/mean_terminated_length": 1150.571533203125,
|
|
"completions/min_length": 371.0,
|
|
"completions/min_terminated_length": 371.0,
|
|
"epoch": 0.16530995616781466,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.001502145266533,
|
|
"kl": 0.0022430419921875,
|
|
"learning_rate": 9.888732857138291e-07,
|
|
"loss": -0.04,
|
|
"num_tokens": 6912533.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8428164720535278,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.020130872057838745,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04873657297962695,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369003,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 132
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 882.0,
|
|
"completions/mean_length": 1177.75,
|
|
"completions/mean_terminated_length": 855.5,
|
|
"completions/min_length": 795.0,
|
|
"completions/min_terminated_length": 795.0,
|
|
"epoch": 0.16656230432060112,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.849369584207102,
|
|
"kl": 0.00208282470703125,
|
|
"learning_rate": 9.884345630955742e-07,
|
|
"loss": -0.0097,
|
|
"num_tokens": 6966273.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0472090244293213,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08003635148497827,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09874522821696813,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823629,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 133
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1491.0,
|
|
"completions/mean_length": 1488.0,
|
|
"completions/mean_terminated_length": 1404.0,
|
|
"completions/min_length": 1317.0,
|
|
"completions/min_terminated_length": 1317.0,
|
|
"epoch": 0.16781465247338762,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.688722282572705,
|
|
"kl": 0.0026092529296875,
|
|
"learning_rate": 9.879874700803082e-07,
|
|
"loss": 0.0158,
|
|
"num_tokens": 7027657.0,
|
|
"reward": 3.166496753692627e-08,
|
|
"reward_std": 1.0543937683105469,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 3.166496753692627e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11540214745824308,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23102363071615145,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 134
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1312.0,
|
|
"completions/mean_length": 1146.375,
|
|
"completions/mean_terminated_length": 1064.769287109375,
|
|
"completions/min_length": 858.0,
|
|
"completions/min_terminated_length": 858.0,
|
|
"epoch": 0.16906700062617408,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.664759257998968,
|
|
"kl": 0.0027923583984375,
|
|
"learning_rate": 9.875320152037318e-07,
|
|
"loss": -0.0535,
|
|
"num_tokens": 7084095.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8985534906387329,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23834962043700852,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.27030996076033054,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11417984514369006,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 135
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1237.0,
|
|
"completions/mean_length": 1274.875,
|
|
"completions/mean_terminated_length": 1049.75,
|
|
"completions/min_length": 931.0,
|
|
"completions/min_terminated_length": 931.0,
|
|
"epoch": 0.17031934877896054,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4849191974864953,
|
|
"kl": 0.0015964508056640625,
|
|
"learning_rate": 9.870682071611862e-07,
|
|
"loss": 0.0064,
|
|
"num_tokens": 7133293.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6105766892433167,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02324001170505371,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08829030406958045,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 136
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1462.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 955.6875,
|
|
"completions/mean_terminated_length": 955.6875,
|
|
"completions/min_length": 761.0,
|
|
"completions/min_terminated_length": 761.0,
|
|
"epoch": 0.17157169693174704,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.620633996964256,
|
|
"kl": 0.0014123916625976562,
|
|
"learning_rate": 9.865960548074874e-07,
|
|
"loss": 0.0103,
|
|
"num_tokens": 7187688.0,
|
|
"reward": 5.960464477539063e-08,
|
|
"reward_std": 0.6596803069114685,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0038102094327885448,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12227248665731598,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 137
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1478.0,
|
|
"completions/mean_length": 1322.0,
|
|
"completions/mean_terminated_length": 1093.1429443359375,
|
|
"completions/min_length": 733.0,
|
|
"completions/min_terminated_length": 733.0,
|
|
"epoch": 0.1728240450845335,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8032617883799893,
|
|
"kl": 0.0021915435791015625,
|
|
"learning_rate": 9.861155671567572e-07,
|
|
"loss": 0.0513,
|
|
"num_tokens": 7236832.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.938301682472229,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06466602322499601,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05158824252677371,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5666666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09428090415820632,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 138
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1335.8125,
|
|
"completions/mean_terminated_length": 1237.300048828125,
|
|
"completions/min_length": 944.0,
|
|
"completions/min_terminated_length": 944.0,
|
|
"epoch": 0.17407639323731997,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.6291703245010103,
|
|
"kl": 0.00284576416015625,
|
|
"learning_rate": 9.856267533822519e-07,
|
|
"loss": -0.021,
|
|
"num_tokens": 7293301.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7662729024887085,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08970693607829759,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16534434492549577,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1970147578604578,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 139
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1340.0,
|
|
"completions/mean_length": 1120.8125,
|
|
"completions/mean_terminated_length": 994.4166870117188,
|
|
"completions/min_length": 844.0,
|
|
"completions/min_terminated_length": 844.0,
|
|
"epoch": 0.17532874139010646,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6820256510644604,
|
|
"kl": 0.0022602081298828125,
|
|
"learning_rate": 9.851296228161857e-07,
|
|
"loss": 0.019,
|
|
"num_tokens": 7341130.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8969849348068237,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026873742767844065,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0656536955300479,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 140
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1404.0,
|
|
"completions/mean_length": 1399.75,
|
|
"completions/mean_terminated_length": 1232.666748046875,
|
|
"completions/min_length": 936.0,
|
|
"completions/min_terminated_length": 936.0,
|
|
"epoch": 0.17658108954289292,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.372829326628183,
|
|
"kl": 0.003108978271484375,
|
|
"learning_rate": 9.846241849495535e-07,
|
|
"loss": 0.0153,
|
|
"num_tokens": 7410982.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9778778553009033,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05033218082218886,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.025136378125956142,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 141
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1449.5625,
|
|
"completions/mean_terminated_length": 1399.125,
|
|
"completions/min_length": 1240.0,
|
|
"completions/min_terminated_length": 1240.0,
|
|
"epoch": 0.17783343769567939,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.396651196247218,
|
|
"kl": 0.001613616943359375,
|
|
"learning_rate": 9.841104494319492e-07,
|
|
"loss": -0.0053,
|
|
"num_tokens": 7468879.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6796972155570984,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021608644866332537,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11519923314511032,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0739118594202782,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 142
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1458.0,
|
|
"completions/mean_length": 1434.4375,
|
|
"completions/mean_terminated_length": 1350.1429443359375,
|
|
"completions/min_length": 1215.0,
|
|
"completions/min_terminated_length": 1215.0,
|
|
"epoch": 0.17908578584846588,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6795502864911453,
|
|
"kl": 0.00296783447265625,
|
|
"learning_rate": 9.835884260713826e-07,
|
|
"loss": 0.0053,
|
|
"num_tokens": 7526334.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 0.9401005506515503,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05878136743445916,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15259208491300538,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09583937179043481,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 143
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1271.0,
|
|
"completions/mean_length": 1081.1875,
|
|
"completions/mean_terminated_length": 1021.357177734375,
|
|
"completions/min_length": 760.0,
|
|
"completions/min_terminated_length": 760.0,
|
|
"epoch": 0.18033813400125234,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4443025787306487,
|
|
"kl": 0.0014491081237792969,
|
|
"learning_rate": 9.830581248340904e-07,
|
|
"loss": 0.0523,
|
|
"num_tokens": 7560449.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6386822462081909,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030072721096349574,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07933031547923879,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081408,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 144
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1438.0,
|
|
"completions/mean_length": 1464.875,
|
|
"completions/mean_terminated_length": 1312.666748046875,
|
|
"completions/min_length": 1143.0,
|
|
"completions/min_terminated_length": 1143.0,
|
|
"epoch": 0.18159048215403883,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.281062477288285,
|
|
"kl": 0.00322723388671875,
|
|
"learning_rate": 9.82519555844347e-07,
|
|
"loss": 0.0292,
|
|
"num_tokens": 7621295.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8289343118667603,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12500933186269494,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10441096539901965,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 145
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1440.0,
|
|
"completions/mean_length": 1436.875,
|
|
"completions/mean_terminated_length": 1298.0,
|
|
"completions/min_length": 1137.0,
|
|
"completions/min_terminated_length": 1137.0,
|
|
"epoch": 0.1828428303068253,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.852746135375484,
|
|
"kl": 0.0022106170654296875,
|
|
"learning_rate": 9.819727293842715e-07,
|
|
"loss": -0.0099,
|
|
"num_tokens": 7663125.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9735676646232605,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008435227123041298,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08786012223776958,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1299572579307862,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 146
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1465.0,
|
|
"completions/mean_length": 1447.0625,
|
|
"completions/mean_terminated_length": 1288.25,
|
|
"completions/min_length": 1028.0,
|
|
"completions/min_terminated_length": 1028.0,
|
|
"epoch": 0.18409517845961176,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7900001366496268,
|
|
"kl": 0.002574920654296875,
|
|
"learning_rate": 9.814176558936306e-07,
|
|
"loss": 0.0107,
|
|
"num_tokens": 7727518.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.3397839367389679,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09743503994599206,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16748018946124937,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316068,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 147
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 863.0,
|
|
"completions/mean_length": 1135.0625,
|
|
"completions/mean_terminated_length": 770.125,
|
|
"completions/min_length": 571.0,
|
|
"completions/min_terminated_length": 571.0,
|
|
"epoch": 0.18534752661239826,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.560665055023378,
|
|
"kl": 0.00197601318359375,
|
|
"learning_rate": 9.808543459696394e-07,
|
|
"loss": -0.0149,
|
|
"num_tokens": 7771327.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9778045415878296,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.20066201620356428,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3214780108822807,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0938872452190116,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 148
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1456.0,
|
|
"completions/mean_length": 1307.375,
|
|
"completions/mean_terminated_length": 1157.5555419921875,
|
|
"completions/min_length": 336.0,
|
|
"completions/min_terminated_length": 336.0,
|
|
"epoch": 0.18659987476518472,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.698308959281013,
|
|
"kl": 0.003589630126953125,
|
|
"learning_rate": 9.802828103667598e-07,
|
|
"loss": 0.0049,
|
|
"num_tokens": 7824917.0,
|
|
"reward": -9.313225746154785e-09,
|
|
"reward_std": 0.929603099822998,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0017376960374372932,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03411053398366144,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1954576775256058,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 149
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1390.0,
|
|
"completions/mean_length": 1350.8125,
|
|
"completions/mean_terminated_length": 1234.77783203125,
|
|
"completions/min_length": 897.0,
|
|
"completions/min_terminated_length": 897.0,
|
|
"epoch": 0.18785222291797118,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.37227831535049,
|
|
"kl": 0.003437042236328125,
|
|
"learning_rate": 9.797030599964946e-07,
|
|
"loss": -0.0282,
|
|
"num_tokens": 7879658.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6427962779998779,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0803417321639054,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11525098223680169,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1253144193766372,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 150
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1446.0625,
|
|
"completions/mean_terminated_length": 1327.4000244140625,
|
|
"completions/min_length": 1075.0,
|
|
"completions/min_terminated_length": 1075.0,
|
|
"epoch": 0.18910457107075768,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6387419512111028,
|
|
"kl": 0.002285003662109375,
|
|
"learning_rate": 9.791151059271787e-07,
|
|
"loss": -0.0106,
|
|
"num_tokens": 7927819.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9979233145713806,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06657694240337725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20181152584757237,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194862,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 151
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1493.0,
|
|
"completions/mean_length": 1422.1875,
|
|
"completions/mean_terminated_length": 1344.375,
|
|
"completions/min_length": 1078.0,
|
|
"completions/min_terminated_length": 1078.0,
|
|
"epoch": 0.19035691922354414,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8499814649815485,
|
|
"kl": 0.00269317626953125,
|
|
"learning_rate": 9.78518959383769e-07,
|
|
"loss": -0.0267,
|
|
"num_tokens": 7979030.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6457971334457397,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032289559957375875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03678022720872768,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 152
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.19160926737633063,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7439239666738633,
|
|
"kl": 0.00275421142578125,
|
|
"learning_rate": 9.779146317476294e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 8039006.0,
|
|
"reward": -1.1175870895385742e-08,
|
|
"reward_std": 1.0521876811981201,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09760563861386369,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10390475856290554,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05692750425533111,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 153
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 1500.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 1500.0,
|
|
"epoch": 0.1928616155291171,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0303320592221237,
|
|
"kl": 0.003154754638671875,
|
|
"learning_rate": 9.773021345563133e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 8103454.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0458917617797852,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.25916260149601344,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18093382728997642,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 154
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 1404.1875,
|
|
"completions/mean_terminated_length": 1346.7000732421875,
|
|
"completions/min_length": 1252.0,
|
|
"completions/min_terminated_length": 1252.0,
|
|
"epoch": 0.19411396368190356,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7184034168824684,
|
|
"kl": 0.002166748046875,
|
|
"learning_rate": 9.766814795033438e-07,
|
|
"loss": 0.0074,
|
|
"num_tokens": 8157473.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9921345710754395,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05048016331986036,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1222984521625515,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07187952884282611,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 155
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1361.0,
|
|
"completions/mean_length": 1382.875,
|
|
"completions/mean_terminated_length": 1031.5,
|
|
"completions/min_length": 853.0,
|
|
"completions/min_terminated_length": 853.0,
|
|
"epoch": 0.19536631183469005,
|
|
"frac_reward_zero_std": 0.5,
|
|
"grad_norm": 1.7493199389788998,
|
|
"kl": 0.002361297607421875,
|
|
"learning_rate": 9.7605267843799e-07,
|
|
"loss": -0.0294,
|
|
"num_tokens": 8204367.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7406100630760193,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4396175531814227,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.42217210131772864,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970789,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 156
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1403.0,
|
|
"completions/mean_length": 1252.6875,
|
|
"completions/mean_terminated_length": 1104.300048828125,
|
|
"completions/min_length": 780.0,
|
|
"completions/min_terminated_length": 780.0,
|
|
"epoch": 0.19661865998747652,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1812238222319373,
|
|
"kl": 0.002895355224609375,
|
|
"learning_rate": 9.754157433650416e-07,
|
|
"loss": 0.0099,
|
|
"num_tokens": 8250426.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7503967881202698,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09038614901064657,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10393207102574,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 157
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 1485.9375,
|
|
"completions/mean_terminated_length": 1425.0,
|
|
"completions/min_length": 1367.0,
|
|
"completions/min_terminated_length": 1367.0,
|
|
"epoch": 0.19787100814026298,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.274381341557373,
|
|
"kl": 0.0019855499267578125,
|
|
"learning_rate": 9.74770686444578e-07,
|
|
"loss": -0.0039,
|
|
"num_tokens": 8312649.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 1.0463612079620361,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01854492153050523,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07355929227115507,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 158
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1296.0,
|
|
"completions/mean_length": 1468.6875,
|
|
"completions/mean_terminated_length": 1249.5,
|
|
"completions/min_length": 1203.0,
|
|
"completions/min_terminated_length": 1203.0,
|
|
"epoch": 0.19912335629304947,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.037789984063529,
|
|
"kl": 0.00295257568359375,
|
|
"learning_rate": 9.74117519991739e-07,
|
|
"loss": 0.0195,
|
|
"num_tokens": 8372460.0,
|
|
"reward": 5.960464477539063e-08,
|
|
"reward_std": 0.6518849730491638,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010805361779511215,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10954072593469087,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12041594578792295,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 159
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1353.0,
|
|
"completions/mean_length": 1422.5625,
|
|
"completions/mean_terminated_length": 1293.5,
|
|
"completions/min_length": 1240.0,
|
|
"completions/min_terminated_length": 1240.0,
|
|
"epoch": 0.20037570444583594,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4914861954275853,
|
|
"kl": 0.0020751953125,
|
|
"learning_rate": 9.734562564764863e-07,
|
|
"loss": -0.0084,
|
|
"num_tokens": 8441477.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0050157308578491,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.20082839440532127,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24021378555176306,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08073734277593311,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 160
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1444.0,
|
|
"completions/mean_length": 1496.5,
|
|
"completions/mean_terminated_length": 1444.0,
|
|
"completions/min_length": 1444.0,
|
|
"completions/min_terminated_length": 1444.0,
|
|
"epoch": 0.2016280525986224,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.96733321823591,
|
|
"kl": 0.003032684326171875,
|
|
"learning_rate": 9.727869085233683e-07,
|
|
"loss": 0.0008,
|
|
"num_tokens": 8500525.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0511749982833862,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.30725599890646893,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11503663852918616,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333336,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 161
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1417.0,
|
|
"completions/mean_length": 1405.9375,
|
|
"completions/mean_terminated_length": 1199.0,
|
|
"completions/min_length": 992.0,
|
|
"completions/min_terminated_length": 992.0,
|
|
"epoch": 0.2028804007514089,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1286840232588795,
|
|
"kl": 0.003131866455078125,
|
|
"learning_rate": 9.721094889112769e-07,
|
|
"loss": -0.0017,
|
|
"num_tokens": 8561668.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0658842325210571,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.3530029462031852,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.3684803684710799,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10036968702787749,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 162
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1408.4375,
|
|
"completions/mean_terminated_length": 1290.71435546875,
|
|
"completions/min_length": 1074.0,
|
|
"completions/min_terminated_length": 1074.0,
|
|
"epoch": 0.20413274890419536,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9421524856257575,
|
|
"kl": 0.003437042236328125,
|
|
"learning_rate": 9.714240105732056e-07,
|
|
"loss": -0.0217,
|
|
"num_tokens": 8611395.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8545268774032593,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.004406093333840853,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07521193600811737,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066223,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 163
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1406.0,
|
|
"completions/mean_length": 1383.4375,
|
|
"completions/mean_terminated_length": 878.3333740234375,
|
|
"completions/min_length": 209.0,
|
|
"completions/min_terminated_length": 209.0,
|
|
"epoch": 0.20538509705698185,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8733824694411965,
|
|
"kl": 0.002685546875,
|
|
"learning_rate": 9.707304865960003e-07,
|
|
"loss": 0.0086,
|
|
"num_tokens": 8668282.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.019072413444519,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09355282337201007,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11627823063016991,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12619796324000607,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 164
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1349.0,
|
|
"completions/mean_length": 1461.0625,
|
|
"completions/mean_terminated_length": 1292.3333740234375,
|
|
"completions/min_length": 1182.0,
|
|
"completions/min_terminated_length": 1182.0,
|
|
"epoch": 0.20663744520976832,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0251334252111324,
|
|
"kl": 0.00315093994140625,
|
|
"learning_rate": 9.700289302201118e-07,
|
|
"loss": -0.0054,
|
|
"num_tokens": 8726843.0,
|
|
"reward": 2.2351741790771484e-08,
|
|
"reward_std": 0.9717680215835571,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03285324398900216,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1305907322232915,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921946,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 165
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1479.9375,
|
|
"completions/mean_terminated_length": 1339.5,
|
|
"completions/min_length": 1187.0,
|
|
"completions/min_terminated_length": 1187.0,
|
|
"epoch": 0.20788979336255478,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1164633460783593,
|
|
"kl": 0.00360107421875,
|
|
"learning_rate": 9.69319354839341e-07,
|
|
"loss": -0.01,
|
|
"num_tokens": 8774074.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6172374486923218,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010136480012205995,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05376440319397317,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797316,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 166
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 1443.6875,
|
|
"completions/mean_terminated_length": 1371.2857666015625,
|
|
"completions/min_length": 1221.0,
|
|
"completions/min_terminated_length": 1221.0,
|
|
"epoch": 0.20914214151534127,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8681254932066893,
|
|
"kl": 0.003143310546875,
|
|
"learning_rate": 9.686017740005845e-07,
|
|
"loss": -0.0029,
|
|
"num_tokens": 8833421.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.049817442893982,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23814174262345672,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24595173419132288,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10470416879457554,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 167
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 1498.9375,
|
|
"completions/mean_terminated_length": 1483.0,
|
|
"completions/min_length": 1483.0,
|
|
"completions/min_terminated_length": 1483.0,
|
|
"epoch": 0.21039448966812774,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.699748602413328,
|
|
"kl": 0.002422332763671875,
|
|
"learning_rate": 9.678762014035755e-07,
|
|
"loss": 0.001,
|
|
"num_tokens": 8896332.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8306390047073364,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01676756574749607,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03592053954406261,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 168
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1481.0,
|
|
"completions/mean_length": 1463.25,
|
|
"completions/mean_terminated_length": 1304.0,
|
|
"completions/min_length": 1117.0,
|
|
"completions/min_terminated_length": 1117.0,
|
|
"epoch": 0.2116468378209142,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9281066984860664,
|
|
"kl": 0.0030670166015625,
|
|
"learning_rate": 9.67142650900622e-07,
|
|
"loss": 0.0284,
|
|
"num_tokens": 8960800.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.051703691482544,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02864644527108891,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12265684181148895,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 169
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1385.0,
|
|
"completions/mean_length": 1492.8125,
|
|
"completions/mean_terminated_length": 1385.0,
|
|
"completions/min_length": 1385.0,
|
|
"completions/min_terminated_length": 1385.0,
|
|
"epoch": 0.2128991859737007,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.719524959962169,
|
|
"kl": 0.002513885498046875,
|
|
"learning_rate": 9.664011364963427e-07,
|
|
"loss": -0.0014,
|
|
"num_tokens": 9014901.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6419066190719604,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026585625959977408,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774533640389131,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901158,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 170
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1467.0,
|
|
"completions/mean_length": 1398.875,
|
|
"completions/mean_terminated_length": 1268.857177734375,
|
|
"completions/min_length": 977.0,
|
|
"completions/min_terminated_length": 977.0,
|
|
"epoch": 0.21415153412648716,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2711757628292637,
|
|
"kl": 0.003734588623046875,
|
|
"learning_rate": 9.656516723474003e-07,
|
|
"loss": 0.0199,
|
|
"num_tokens": 9082635.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7075515985488892,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04754440907840732,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.19258567827157586,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11155467020454342,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 171
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1500.0,
|
|
"completions/mean_length": 1372.0,
|
|
"completions/mean_terminated_length": 1244.0,
|
|
"completions/min_length": 414.0,
|
|
"completions/min_terminated_length": 414.0,
|
|
"epoch": 0.21540388227927365,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.130031481410935,
|
|
"kl": 0.003208160400390625,
|
|
"learning_rate": 9.648942727622293e-07,
|
|
"loss": -0.0004,
|
|
"num_tokens": 9139131.0,
|
|
"reward": 4.470348358154297e-08,
|
|
"reward_std": 0.8231313824653625,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.15172830287547154,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10156016936265624,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.825,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509011,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 172
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1441.0,
|
|
"completions/mean_length": 1439.125,
|
|
"completions/mean_terminated_length": 1337.666748046875,
|
|
"completions/min_length": 1231.0,
|
|
"completions/min_terminated_length": 1231.0,
|
|
"epoch": 0.21665623043206012,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8174656495384003,
|
|
"kl": 0.003330230712890625,
|
|
"learning_rate": 9.641289522007648e-07,
|
|
"loss": 0.0184,
|
|
"num_tokens": 9189589.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9120515584945679,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.08079485341203167,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.404800820644525,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059004080210452274,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 173
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1416.4375,
|
|
"completions/mean_terminated_length": 1232.5999755859375,
|
|
"completions/min_length": 961.0,
|
|
"completions/min_terminated_length": 961.0,
|
|
"epoch": 0.21790857858484658,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.883462614618569,
|
|
"kl": 0.003101348876953125,
|
|
"learning_rate": 9.633557252741655e-07,
|
|
"loss": -0.0209,
|
|
"num_tokens": 9242428.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7592308521270752,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09250187361454984,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24709362304891008,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1630723538573985,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 174
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1310.0,
|
|
"completions/mean_length": 1153.5625,
|
|
"completions/mean_terminated_length": 884.1111450195312,
|
|
"completions/min_length": 704.0,
|
|
"completions/min_terminated_length": 704.0,
|
|
"epoch": 0.21916092673763307,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.067205627199215,
|
|
"kl": 0.003124237060546875,
|
|
"learning_rate": 9.625746067445344e-07,
|
|
"loss": 0.0267,
|
|
"num_tokens": 9286885.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8734534978866577,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0040830023789233914,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.007219286680192259,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09699179041242308,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 175
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1475.0,
|
|
"completions/mean_length": 1478.625,
|
|
"completions/mean_terminated_length": 1329.0,
|
|
"completions/min_length": 1183.0,
|
|
"completions/min_terminated_length": 1183.0,
|
|
"epoch": 0.22041327489041954,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1276319460691004,
|
|
"kl": 0.003200531005859375,
|
|
"learning_rate": 9.61785611524638e-07,
|
|
"loss": -0.0146,
|
|
"num_tokens": 9345695.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7759820222854614,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.008401002667427777,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08187937939788012,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792516,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 176
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1363.0,
|
|
"completions/mean_length": 1251.3125,
|
|
"completions/mean_terminated_length": 1215.7857666015625,
|
|
"completions/min_length": 1017.0,
|
|
"completions/min_terminated_length": 1017.0,
|
|
"epoch": 0.221665623043206,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6069318364125738,
|
|
"kl": 0.0021648406982421875,
|
|
"learning_rate": 9.609887546776213e-07,
|
|
"loss": -0.0061,
|
|
"num_tokens": 9382804.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8300349712371826,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04134925667146179,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05624626120552443,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07698003589195014,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 177
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1424.0,
|
|
"completions/mean_length": 1388.0,
|
|
"completions/mean_terminated_length": 1244.0,
|
|
"completions/min_length": 998.0,
|
|
"completions/min_terminated_length": 998.0,
|
|
"epoch": 0.2229179711959925,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.156831352479139,
|
|
"kl": 0.00347137451171875,
|
|
"learning_rate": 9.601840514167194e-07,
|
|
"loss": -0.0001,
|
|
"num_tokens": 9443532.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9561296701431274,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.02300302439349743,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06503983162022253,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13305526559931294,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 178
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 1296.75,
|
|
"completions/mean_terminated_length": 1204.3636474609375,
|
|
"completions/min_length": 963.0,
|
|
"completions/min_terminated_length": 963.0,
|
|
"epoch": 0.22417031934877896,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2566872824431337,
|
|
"kl": 0.003185272216796875,
|
|
"learning_rate": 9.593715171049677e-07,
|
|
"loss": -0.0019,
|
|
"num_tokens": 9493936.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9979840517044067,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04123772744400983,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.055545285602727666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08766518798921942,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 179
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 1462.0625,
|
|
"completions/mean_terminated_length": 1348.25,
|
|
"completions/min_length": 1185.0,
|
|
"completions/min_terminated_length": 1185.0,
|
|
"epoch": 0.22542266750156542,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.118655152417983,
|
|
"kl": 0.003711700439453125,
|
|
"learning_rate": 9.585511672549087e-07,
|
|
"loss": -0.0119,
|
|
"num_tokens": 9547913.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6055276393890381,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.2990419990496254,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.5212506601592531,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11547005383792518,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 180
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 1253.875,
|
|
"completions/mean_terminated_length": 1062.4444580078125,
|
|
"completions/min_length": 742.0,
|
|
"completions/min_terminated_length": 742.0,
|
|
"epoch": 0.2266750156543519,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.453588710121719,
|
|
"kl": 0.003208160400390625,
|
|
"learning_rate": 9.577230175282956e-07,
|
|
"loss": -0.0189,
|
|
"num_tokens": 9590383.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.026740550994873,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.22982623849797099,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35491751307206737,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194866,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 181
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1431.0,
|
|
"completions/mean_length": 1397.3125,
|
|
"completions/mean_terminated_length": 1294.625,
|
|
"completions/min_length": 1209.0,
|
|
"completions/min_terminated_length": 1209.0,
|
|
"epoch": 0.22792736380713838,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8117171390856717,
|
|
"kl": 0.00273895263671875,
|
|
"learning_rate": 9.568870837357933e-07,
|
|
"loss": 0.0049,
|
|
"num_tokens": 9635180.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9024027585983276,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015236533423952495,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05515104905405319,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1270024788326182,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 182
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1389.4375,
|
|
"completions/mean_terminated_length": 1247.2857666015625,
|
|
"completions/min_length": 1029.0,
|
|
"completions/min_terminated_length": 1029.0,
|
|
"epoch": 0.22917971195992487,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.146143558726643,
|
|
"kl": 0.003173828125,
|
|
"learning_rate": 9.56043381836677e-07,
|
|
"loss": 0.0244,
|
|
"num_tokens": 9691707.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.6937527656555176,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06475936323780643,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07867382027532054,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07781745019952505,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 183
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 1331.75,
|
|
"completions/mean_terminated_length": 1275.666748046875,
|
|
"completions/min_length": 857.0,
|
|
"completions/min_terminated_length": 857.0,
|
|
"epoch": 0.23043206011271133,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.445265358440665,
|
|
"kl": 0.003719329833984375,
|
|
"learning_rate": 9.551919279385267e-07,
|
|
"loss": 0.0321,
|
|
"num_tokens": 9741247.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9354739785194397,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011286604414356131,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06370003732540648,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09878896324620107,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 184
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1381.0,
|
|
"completions/mean_length": 1312.6875,
|
|
"completions/mean_terminated_length": 1269.4615478515625,
|
|
"completions/min_length": 994.0,
|
|
"completions/min_terminated_length": 994.0,
|
|
"epoch": 0.2316844082654978,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2176769971519983,
|
|
"kl": 0.0028533935546875,
|
|
"learning_rate": 9.543327382969203e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 9800986.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8514897227287292,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06189917187460071,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09461702207527528,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09067647005823631,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 185
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1437.0,
|
|
"completions/max_terminated_length": 1437.0,
|
|
"completions/mean_length": 1251.9375,
|
|
"completions/mean_terminated_length": 1251.9375,
|
|
"completions/min_length": 1139.0,
|
|
"completions/min_terminated_length": 1139.0,
|
|
"epoch": 0.2329367564182843,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 1.8078703137230523,
|
|
"kl": 0.0009489059448242188,
|
|
"learning_rate": 9.534658293151226e-07,
|
|
"loss": 0.0206,
|
|
"num_tokens": 9844961.0,
|
|
"reward": -2.2351741790771484e-08,
|
|
"reward_std": 1.0031490325927734,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1837486103073024,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2121586351571871,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11642832797715322,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 186
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.23418910457107076,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.341222957754127,
|
|
"kl": 0.0020427703857421875,
|
|
"learning_rate": 9.525912175437733e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 9904889.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8993015289306641,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0626094048175301,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14524912930313416,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1102186379345533,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 187
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1459.0,
|
|
"completions/mean_length": 1401.75,
|
|
"completions/mean_terminated_length": 1303.5,
|
|
"completions/min_length": 946.0,
|
|
"completions/min_terminated_length": 946.0,
|
|
"epoch": 0.23544145272385722,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7109926581212966,
|
|
"kl": 0.0028514862060546875,
|
|
"learning_rate": 9.5170891968057e-07,
|
|
"loss": 0.0103,
|
|
"num_tokens": 9960061.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.4977339506149292,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007690022648520695,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11369344635650466,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11279282877125754,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 188
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 1399.3125,
|
|
"completions/mean_terminated_length": 1321.0,
|
|
"completions/min_length": 1270.0,
|
|
"completions/min_terminated_length": 1270.0,
|
|
"epoch": 0.2366938008766437,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.2019750861669105,
|
|
"kl": 0.0019474029541015625,
|
|
"learning_rate": 9.508189525699498e-07,
|
|
"loss": 0.0016,
|
|
"num_tokens": 10018474.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.40811485052108765,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09626746004308685,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11868608664564458,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11385500851066221,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 189
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1067.0,
|
|
"completions/mean_length": 1176.1875,
|
|
"completions/mean_terminated_length": 852.375,
|
|
"completions/min_length": 641.0,
|
|
"completions/min_terminated_length": 641.0,
|
|
"epoch": 0.23794614902943018,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0536777064675973,
|
|
"kl": 0.00238037109375,
|
|
"learning_rate": 9.499213332027676e-07,
|
|
"loss": -0.0079,
|
|
"num_tokens": 10055509.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.5494594573974609,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09604975311367514,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10254148239725733,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07084150279686702,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 190
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1421.0,
|
|
"completions/max_terminated_length": 1421.0,
|
|
"completions/mean_length": 980.5,
|
|
"completions/mean_terminated_length": 980.5,
|
|
"completions/min_length": 598.0,
|
|
"completions/min_terminated_length": 598.0,
|
|
"epoch": 0.23919849718221667,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5775077184845583,
|
|
"kl": 0.003173828125,
|
|
"learning_rate": 9.490160787159716e-07,
|
|
"loss": -0.0435,
|
|
"num_tokens": 10088493.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7993010878562927,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017021331786918385,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08925782815695868,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5791666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625447,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 191
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1491.0,
|
|
"completions/mean_length": 1205.125,
|
|
"completions/mean_terminated_length": 1028.2000732421875,
|
|
"completions/min_length": 700.0,
|
|
"completions/min_terminated_length": 700.0,
|
|
"epoch": 0.24045084533500313,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.472937551782796,
|
|
"kl": 0.003765106201171875,
|
|
"learning_rate": 9.481032063922764e-07,
|
|
"loss": 0.0801,
|
|
"num_tokens": 10134447.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9049590826034546,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.028540941769550358,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.047797358383350766,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346314,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 192
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 1342.1875,
|
|
"completions/mean_terminated_length": 1219.4444580078125,
|
|
"completions/min_length": 823.0,
|
|
"completions/min_terminated_length": 823.0,
|
|
"epoch": 0.2417031934877896,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0654183912224107,
|
|
"kl": 0.00345611572265625,
|
|
"learning_rate": 9.471827336598332e-07,
|
|
"loss": -0.0116,
|
|
"num_tokens": 10182434.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.894692599773407,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.10903944916375954,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15533453332102554,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06540472290116196,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 193
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.2429555416405761,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7779664849113415,
|
|
"kl": 0.003116607666015625,
|
|
"learning_rate": 9.462546780918966e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 10244530.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.967013955116272,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.053086024723834134,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06887877561253418,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 194
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 1454.9375,
|
|
"completions/mean_terminated_length": 1397.0001220703125,
|
|
"completions/min_length": 1265.0,
|
|
"completions/min_terminated_length": 1265.0,
|
|
"epoch": 0.24420788979336255,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1642061803468913,
|
|
"kl": 0.003826141357421875,
|
|
"learning_rate": 9.453190574064893e-07,
|
|
"loss": -0.0047,
|
|
"num_tokens": 10299345.0,
|
|
"reward": 1.862645149230957e-08,
|
|
"reward_std": 1.04762601852417,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.862645149230957e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04354357070732585,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08903133853741613,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725114,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 195
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1337.0625,
|
|
"completions/mean_terminated_length": 1282.75,
|
|
"completions/min_length": 999.0,
|
|
"completions/min_terminated_length": 999.0,
|
|
"epoch": 0.24546023794614902,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0019183290119758,
|
|
"kl": 0.003467559814453125,
|
|
"learning_rate": 9.443758894660638e-07,
|
|
"loss": 0.0284,
|
|
"num_tokens": 10358514.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6315692067146301,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07697389081957594,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12645427286420413,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10671873729054746,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 196
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1413.0,
|
|
"completions/mean_length": 1494.5625,
|
|
"completions/mean_terminated_length": 1413.0,
|
|
"completions/min_length": 1413.0,
|
|
"completions/min_terminated_length": 1413.0,
|
|
"epoch": 0.2467125860989355,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8267138784190755,
|
|
"kl": 0.002933502197265625,
|
|
"learning_rate": 9.434251922771616e-07,
|
|
"loss": 0.0078,
|
|
"num_tokens": 10411171.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.884939432144165,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01747490695405262,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06365932956310252,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12141145226353543,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 197
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1418.0,
|
|
"completions/mean_length": 1410.375,
|
|
"completions/mean_terminated_length": 1261.0,
|
|
"completions/min_length": 803.0,
|
|
"completions/min_terminated_length": 803.0,
|
|
"epoch": 0.24796493425172197,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1021608965378076,
|
|
"kl": 0.003887176513671875,
|
|
"learning_rate": 9.424669839900691e-07,
|
|
"loss": 0.0143,
|
|
"num_tokens": 10469257.0,
|
|
"reward": -5.21540641784668e-08,
|
|
"reward_std": 1.061091661453247,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.21540641784668e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0551289409217747,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.2097823559795121,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08421753138505424,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 198
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1407.0,
|
|
"completions/mean_length": 1370.9375,
|
|
"completions/mean_terminated_length": 1205.0,
|
|
"completions/min_length": 1019.0,
|
|
"completions/min_terminated_length": 1019.0,
|
|
"epoch": 0.24921728240450847,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6919185602949653,
|
|
"kl": 0.002506256103515625,
|
|
"learning_rate": 9.415012828984714e-07,
|
|
"loss": 0.0067,
|
|
"num_tokens": 10523624.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7187443971633911,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03211836693332174,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13737955494238535,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026005,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 199
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1361.0,
|
|
"completions/mean_length": 1390.8125,
|
|
"completions/mean_terminated_length": 1281.625,
|
|
"completions/min_length": 1177.0,
|
|
"completions/min_terminated_length": 1177.0,
|
|
"epoch": 0.25046963055729493,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.450252869715986,
|
|
"kl": 0.00223541259765625,
|
|
"learning_rate": 9.405281074391022e-07,
|
|
"loss": -0.0098,
|
|
"num_tokens": 10579429.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.4103597402572632,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1406289464666968,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15985873234433481,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08933913745655643,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 200
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 1295.8125,
|
|
"completions/mean_terminated_length": 1266.6429443359375,
|
|
"completions/min_length": 994.0,
|
|
"completions/min_terminated_length": 994.0,
|
|
"epoch": 0.2517219787100814,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.642709475322054,
|
|
"kl": 0.0023345947265625,
|
|
"learning_rate": 9.395474761913939e-07,
|
|
"loss": 0.014,
|
|
"num_tokens": 10628866.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.7710261940956116,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04348868814755175,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0830759853911682,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14504150108516198,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 201
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 1357.125,
|
|
"completions/mean_terminated_length": 1324.1539306640625,
|
|
"completions/min_length": 1030.0,
|
|
"completions/min_terminated_length": 1030.0,
|
|
"epoch": 0.25297432686286786,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.216143536482822,
|
|
"kl": 0.004241943359375,
|
|
"learning_rate": 9.3855940787712e-07,
|
|
"loss": -0.0086,
|
|
"num_tokens": 10670092.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6420686841011047,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03687807737633173,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16934247164490465,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14343665526661611,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327954292297363,
|
|
"step": 202
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1434.0,
|
|
"completions/mean_length": 1278.625,
|
|
"completions/mean_terminated_length": 1227.5384521484375,
|
|
"completions/min_length": 970.0,
|
|
"completions/min_terminated_length": 970.0,
|
|
"epoch": 0.2542266750156543,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4244156852349814,
|
|
"kl": 0.0052642822265625,
|
|
"learning_rate": 9.375639213600401e-07,
|
|
"loss": -0.0436,
|
|
"num_tokens": 10728350.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9011333584785461,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.09818030402455966,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07523729893672071,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13080944580232393,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 203
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 1443.9375,
|
|
"completions/mean_terminated_length": 1371.857177734375,
|
|
"completions/min_length": 1252.0,
|
|
"completions/min_terminated_length": 1252.0,
|
|
"epoch": 0.25547902316844084,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5077076210411278,
|
|
"kl": 0.00457000732421875,
|
|
"learning_rate": 9.365610356455384e-07,
|
|
"loss": 0.0019,
|
|
"num_tokens": 10791365.0,
|
|
"reward": 4.470348358154297e-08,
|
|
"reward_std": 0.8847507238388062,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015580215905333485,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06751943458738671,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.135263802609184,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 204
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1351.0,
|
|
"completions/mean_length": 1391.8125,
|
|
"completions/mean_terminated_length": 1067.25,
|
|
"completions/min_length": 264.0,
|
|
"completions/min_terminated_length": 264.0,
|
|
"epoch": 0.2567313713212273,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.827913375094597,
|
|
"kl": 0.003643035888671875,
|
|
"learning_rate": 9.355507698802613e-07,
|
|
"loss": -0.0786,
|
|
"num_tokens": 10852330.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0562589168548584,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09460135777577211,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12653992925605045,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11122216672215289,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 205
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1397.0,
|
|
"completions/mean_length": 1426.6875,
|
|
"completions/mean_terminated_length": 1265.4000244140625,
|
|
"completions/min_length": 1092.0,
|
|
"completions/min_terminated_length": 1092.0,
|
|
"epoch": 0.2579837194740138,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 4.109416757035146,
|
|
"kl": 0.00577545166015625,
|
|
"learning_rate": 9.345331433517522e-07,
|
|
"loss": 0.0289,
|
|
"num_tokens": 10918837.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9729784727096558,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04706903609226349,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08099201475868337,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045818,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 206
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1478.0,
|
|
"completions/mean_length": 1464.1875,
|
|
"completions/mean_terminated_length": 1356.75,
|
|
"completions/min_length": 1188.0,
|
|
"completions/min_terminated_length": 1188.0,
|
|
"epoch": 0.25923606762680024,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1385572152974905,
|
|
"kl": 0.003864288330078125,
|
|
"learning_rate": 9.335081754880825e-07,
|
|
"loss": 0.0082,
|
|
"num_tokens": 10974608.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.5515385270118713,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06366384054522155,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10778487016156474,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1002773930432755,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 207
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1487.0,
|
|
"completions/mean_length": 1448.625,
|
|
"completions/mean_terminated_length": 1363.0,
|
|
"completions/min_length": 1103.0,
|
|
"completions/min_terminated_length": 1103.0,
|
|
"epoch": 0.2604884157795867,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2758893957680097,
|
|
"kl": 0.004230499267578125,
|
|
"learning_rate": 9.32475885857481e-07,
|
|
"loss": -0.0053,
|
|
"num_tokens": 11033482.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5894155502319336,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05038277241462744,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07116397984833597,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07097208632298363,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 208
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1463.0,
|
|
"completions/mean_length": 1455.75,
|
|
"completions/mean_terminated_length": 1264.0,
|
|
"completions/min_length": 1089.0,
|
|
"completions/min_terminated_length": 1089.0,
|
|
"epoch": 0.2617407639323732,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6461488967254634,
|
|
"kl": 0.0025310516357421875,
|
|
"learning_rate": 9.31436294167961e-07,
|
|
"loss": -0.0132,
|
|
"num_tokens": 11098902.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6484573483467102,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1468978313797672,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.24420746920563674,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 209
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1413.0,
|
|
"completions/mean_length": 1237.1875,
|
|
"completions/mean_terminated_length": 1079.5,
|
|
"completions/min_length": 742.0,
|
|
"completions/min_terminated_length": 742.0,
|
|
"epoch": 0.2629931120851597,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.313685913897086,
|
|
"kl": 0.003513336181640625,
|
|
"learning_rate": 9.303894202669428e-07,
|
|
"loss": 0.0531,
|
|
"num_tokens": 11148649.0,
|
|
"reward": -1.1175870895385742e-08,
|
|
"reward_std": 0.990402102470398,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.1175870895385742e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0009552414586071921,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0038209658344287682,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 210
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1433.0,
|
|
"completions/mean_length": 1327.125,
|
|
"completions/mean_terminated_length": 1154.25,
|
|
"completions/min_length": 970.0,
|
|
"completions/min_terminated_length": 970.0,
|
|
"epoch": 0.26424546023794615,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0546042142648298,
|
|
"kl": 0.0033416748046875,
|
|
"learning_rate": 9.293352841408759e-07,
|
|
"loss": -0.0213,
|
|
"num_tokens": 11207483.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5952367186546326,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.035691884267146166,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07657424493915134,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672248,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 211
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1375.0,
|
|
"completions/mean_length": 1306.25,
|
|
"completions/mean_terminated_length": 1190.0,
|
|
"completions/min_length": 1071.0,
|
|
"completions/min_terminated_length": 1071.0,
|
|
"epoch": 0.2654978083907326,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.123766104646228,
|
|
"kl": 0.0015621185302734375,
|
|
"learning_rate": 9.282739059148566e-07,
|
|
"loss": -0.0237,
|
|
"num_tokens": 11255703.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9929344654083252,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.025706850415670862,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11233922174981649,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 212
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1287.0,
|
|
"completions/mean_length": 1126.125,
|
|
"completions/mean_terminated_length": 956.1818237304688,
|
|
"completions/min_length": 674.0,
|
|
"completions/min_terminated_length": 674.0,
|
|
"epoch": 0.2667501565435191,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.488984850056713,
|
|
"kl": 0.0038604736328125,
|
|
"learning_rate": 9.272053058522444e-07,
|
|
"loss": -0.0253,
|
|
"num_tokens": 11294505.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5554646253585815,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.23003407087469527,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.20616326736471785,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6291666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15000000000000002,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 213
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1279.0,
|
|
"completions/max_terminated_length": 1279.0,
|
|
"completions/mean_length": 883.5,
|
|
"completions/mean_terminated_length": 883.5,
|
|
"completions/min_length": 673.0,
|
|
"completions/min_terminated_length": 673.0,
|
|
"epoch": 0.2680025046963056,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2969967424976456,
|
|
"kl": 0.002185821533203125,
|
|
"learning_rate": 9.261295043542747e-07,
|
|
"loss": 0.0085,
|
|
"num_tokens": 11325305.0,
|
|
"reward": 2.421438694000244e-08,
|
|
"reward_std": 1.039635419845581,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.421438694000244e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06031083797758491,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16643314604295306,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11603000888978231,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 214
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1499.0,
|
|
"completions/mean_length": 1423.8125,
|
|
"completions/mean_terminated_length": 1296.8333740234375,
|
|
"completions/min_length": 987.0,
|
|
"completions/min_terminated_length": 987.0,
|
|
"epoch": 0.26925485284909206,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.683342974010468,
|
|
"kl": 0.0028839111328125,
|
|
"learning_rate": 9.250465219596699e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 11384166.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6987115144729614,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.015736024702926166,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06158481768754947,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1057600358603626,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 215
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1390.0,
|
|
"completions/max_terminated_length": 1390.0,
|
|
"completions/mean_length": 1160.6875,
|
|
"completions/mean_terminated_length": 1160.6875,
|
|
"completions/min_length": 865.0,
|
|
"completions/min_terminated_length": 865.0,
|
|
"epoch": 0.27050720100187853,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.800146584559223,
|
|
"kl": 0.004093170166015625,
|
|
"learning_rate": 9.239563793442462e-07,
|
|
"loss": 0.0174,
|
|
"num_tokens": 11441313.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8806728720664978,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0032805949907051112,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0510781770746922,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11894598836509009,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 216
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1450.0,
|
|
"completions/mean_length": 1307.0625,
|
|
"completions/mean_terminated_length": 1279.5,
|
|
"completions/min_length": 985.0,
|
|
"completions/min_terminated_length": 985.0,
|
|
"epoch": 0.271759549154665,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.806350656696486,
|
|
"kl": 0.002574920654296875,
|
|
"learning_rate": 9.228590973205201e-07,
|
|
"loss": -0.0377,
|
|
"num_tokens": 11499258.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.0440177917480469,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0614237528104428,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06944213481803516,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316066,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 217
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.27301189730745146,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4434394608429915,
|
|
"kl": 0.002834320068359375,
|
|
"learning_rate": 9.2175469683731e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 11554162.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9512232542037964,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006459758393578777,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09841534495892398,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13601470508735444,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 218
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1378.0,
|
|
"completions/mean_length": 1139.4375,
|
|
"completions/mean_terminated_length": 1115.4000244140625,
|
|
"completions/min_length": 793.0,
|
|
"completions/min_terminated_length": 793.0,
|
|
"epoch": 0.2742642454602379,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.183780267914974,
|
|
"kl": 0.0016641616821289062,
|
|
"learning_rate": 9.206431989793374e-07,
|
|
"loss": 0.0171,
|
|
"num_tokens": 11599913.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7830429077148438,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006944415247763777,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03283008559006156,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 219
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1295.0,
|
|
"completions/mean_length": 1346.5,
|
|
"completions/mean_terminated_length": 1149.1429443359375,
|
|
"completions/min_length": 1004.0,
|
|
"completions/min_terminated_length": 1004.0,
|
|
"epoch": 0.27551659361302444,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3703074221103817,
|
|
"kl": 0.00464630126953125,
|
|
"learning_rate": 9.195246249668232e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 11664265.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.7190686464309692,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.3476598454237376,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.4301665677025463,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891873,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 220
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1436.0,
|
|
"completions/mean_length": 1309.5625,
|
|
"completions/mean_terminated_length": 1119.125,
|
|
"completions/min_length": 955.0,
|
|
"completions/min_terminated_length": 955.0,
|
|
"epoch": 0.2767689417658109,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.622730015507812,
|
|
"kl": 0.0024242401123046875,
|
|
"learning_rate": 9.183989961550832e-07,
|
|
"loss": -0.0219,
|
|
"num_tokens": 11719922.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.72877037525177,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0952471076969717,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12586824643040787,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08606629658238704,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 221
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1392.0,
|
|
"completions/mean_length": 1485.875,
|
|
"completions/mean_terminated_length": 1387.0,
|
|
"completions/min_length": 1382.0,
|
|
"completions/min_terminated_length": 1382.0,
|
|
"epoch": 0.27802128991859737,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.782237650167012,
|
|
"kl": 0.004253387451171875,
|
|
"learning_rate": 9.172663340341204e-07,
|
|
"loss": -0.0028,
|
|
"num_tokens": 11778680.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0009106397628784,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.012701224890322388,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.02815604341593864,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7124999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408155,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 222
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 1443.0625,
|
|
"completions/mean_terminated_length": 1317.800048828125,
|
|
"completions/min_length": 1121.0,
|
|
"completions/min_terminated_length": 1121.0,
|
|
"epoch": 0.27927363807138383,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.998694336626448,
|
|
"kl": 0.003566741943359375,
|
|
"learning_rate": 9.161266602282147e-07,
|
|
"loss": -0.0055,
|
|
"num_tokens": 11838169.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9211122989654541,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04490957636365446,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09152261044011904,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10741060020797315,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 223
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1450.0,
|
|
"completions/mean_length": 1488.875,
|
|
"completions/mean_terminated_length": 1411.0,
|
|
"completions/min_length": 1372.0,
|
|
"completions/min_terminated_length": 1372.0,
|
|
"epoch": 0.2805259862241703,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9658149533875187,
|
|
"kl": 0.004146575927734375,
|
|
"learning_rate": 9.149799964955093e-07,
|
|
"loss": 0.008,
|
|
"num_tokens": 11899975.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0432794094085693,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012174573886332358,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04195711207506097,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07685966046898342,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 224
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1359.0,
|
|
"completions/mean_length": 1409.8125,
|
|
"completions/mean_terminated_length": 1139.25,
|
|
"completions/min_length": 925.0,
|
|
"completions/min_terminated_length": 925.0,
|
|
"epoch": 0.2817783343769568,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.936718458392348,
|
|
"kl": 0.00342559814453125,
|
|
"learning_rate": 9.138263647275969e-07,
|
|
"loss": -0.0033,
|
|
"num_tokens": 11941164.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0000627040863037,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15080494449355206,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08565387051258783,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09339283817414601,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 225
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1475.0,
|
|
"completions/mean_length": 1183.3125,
|
|
"completions/mean_terminated_length": 1110.2308349609375,
|
|
"completions/min_length": 786.0,
|
|
"completions/min_terminated_length": 786.0,
|
|
"epoch": 0.2830306825297433,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.658827402816175,
|
|
"kl": 0.00414276123046875,
|
|
"learning_rate": 9.126657869491e-07,
|
|
"loss": 0.0126,
|
|
"num_tokens": 11992657.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9479507207870483,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.017415102975537073,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031912571116253466,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07649739768026002,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 226
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1484.0,
|
|
"completions/mean_length": 1499.0,
|
|
"completions/mean_terminated_length": 1484.0,
|
|
"completions/min_length": 1484.0,
|
|
"completions/min_terminated_length": 1484.0,
|
|
"epoch": 0.28428303068252975,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1261193198966652,
|
|
"kl": 0.0041351318359375,
|
|
"learning_rate": 9.114982853172521e-07,
|
|
"loss": 0.0009,
|
|
"num_tokens": 12054529.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0018526315689087,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012434236974245455,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045472914513713596,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252812,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 227
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.2855353788353162,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4904650551200485,
|
|
"kl": 0.00292205810546875,
|
|
"learning_rate": 9.103238821214727e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 12114017.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.4001474976539612,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.19495499044861478,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26273237351903383,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059628479399994425,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 228
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1329.0,
|
|
"completions/mean_length": 1472.3125,
|
|
"completions/mean_terminated_length": 1278.5,
|
|
"completions/min_length": 1228.0,
|
|
"completions/min_terminated_length": 1228.0,
|
|
"epoch": 0.2867877269881027,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9738846657010085,
|
|
"kl": 0.00360107421875,
|
|
"learning_rate": 9.09142599782944e-07,
|
|
"loss": -0.0048,
|
|
"num_tokens": 12167838.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.981914758682251,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.2657549523204851,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31826899071497716,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08777074514725108,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 229
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1488.0,
|
|
"completions/mean_length": 1397.75,
|
|
"completions/mean_terminated_length": 1266.2857666015625,
|
|
"completions/min_length": 987.0,
|
|
"completions/min_terminated_length": 987.0,
|
|
"epoch": 0.28804007514088914,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9008571656686524,
|
|
"kl": 0.003810882568359375,
|
|
"learning_rate": 9.07954460854181e-07,
|
|
"loss": -0.0435,
|
|
"num_tokens": 12219114.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 0.9752408266067505,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 1.7226310978600795e-05,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017137695280743562,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 230
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1462.0,
|
|
"completions/mean_length": 1381.0625,
|
|
"completions/mean_terminated_length": 1262.125,
|
|
"completions/min_length": 1118.0,
|
|
"completions/min_terminated_length": 1118.0,
|
|
"epoch": 0.28929242329367566,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.579522085731086,
|
|
"kl": 0.00286102294921875,
|
|
"learning_rate": 9.067594880186016e-07,
|
|
"loss": 0.0118,
|
|
"num_tokens": 12283627.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8155025839805603,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.4072348540230938,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33458166056964905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11474609652039004,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 231
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1402.0,
|
|
"completions/mean_length": 1432.375,
|
|
"completions/mean_terminated_length": 1229.5,
|
|
"completions/min_length": 1085.0,
|
|
"completions/min_terminated_length": 1085.0,
|
|
"epoch": 0.2905447714464621,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3504781665775463,
|
|
"kl": 0.00449371337890625,
|
|
"learning_rate": 9.055577040900944e-07,
|
|
"loss": 0.0198,
|
|
"num_tokens": 12334705.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0079009532928467,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.007641388631451263,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1447140199531734,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1067187372905475,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 232
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1439.0,
|
|
"completions/mean_length": 1411.4375,
|
|
"completions/mean_terminated_length": 1322.875,
|
|
"completions/min_length": 1169.0,
|
|
"completions/min_terminated_length": 1169.0,
|
|
"epoch": 0.2917971195992486,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.3485586422127005,
|
|
"kl": 0.004638671875,
|
|
"learning_rate": 9.043491320125814e-07,
|
|
"loss": 0.0213,
|
|
"num_tokens": 12389648.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8743376731872559,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002227354544120855,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08926616854117143,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6666666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13109227736669002,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 233
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1492.0,
|
|
"completions/mean_length": 1474.0,
|
|
"completions/mean_terminated_length": 1430.666748046875,
|
|
"completions/min_length": 1263.0,
|
|
"completions/min_terminated_length": 1263.0,
|
|
"epoch": 0.29304946775203505,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8494643618437947,
|
|
"kl": 0.00304412841796875,
|
|
"learning_rate": 9.031337948595817e-07,
|
|
"loss": 0.0093,
|
|
"num_tokens": 12456272.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7424121499061584,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04108305878098174,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1161369232371233,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09016445879408158,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 234
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1477.0,
|
|
"completions/mean_length": 1141.4375,
|
|
"completions/mean_terminated_length": 1117.533447265625,
|
|
"completions/min_length": 557.0,
|
|
"completions/min_terminated_length": 557.0,
|
|
"epoch": 0.2943018159048215,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.2293142678990754,
|
|
"kl": 0.0016422271728515625,
|
|
"learning_rate": 9.019117158337695e-07,
|
|
"loss": 0.0038,
|
|
"num_tokens": 12498031.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.6336873769760132,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.009688556708469433,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05746171503021093,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7291666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1529342632927262,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 235
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1360.0,
|
|
"completions/mean_length": 1298.0,
|
|
"completions/mean_terminated_length": 1140.888916015625,
|
|
"completions/min_length": 942.0,
|
|
"completions/min_terminated_length": 942.0,
|
|
"epoch": 0.29555416405760804,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.714021525456579,
|
|
"kl": 0.00262451171875,
|
|
"learning_rate": 9.006829182665325e-07,
|
|
"loss": -0.0167,
|
|
"num_tokens": 12548119.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6711900234222412,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07748680022506171,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09355524405080126,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09953596037316063,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 236
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1300.5625,
|
|
"completions/mean_terminated_length": 1254.5384521484375,
|
|
"completions/min_length": 1067.0,
|
|
"completions/min_terminated_length": 1067.0,
|
|
"epoch": 0.2968065122103945,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.982742616917611,
|
|
"kl": 0.003223419189453125,
|
|
"learning_rate": 8.99447425617525e-07,
|
|
"loss": 0.0208,
|
|
"num_tokens": 12596288.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.049065351486206,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.24719836974150322,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.26811306631065646,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567837,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 237
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 1029.3125,
|
|
"completions/mean_terminated_length": 997.9334106445312,
|
|
"completions/min_length": 658.0,
|
|
"completions/min_terminated_length": 658.0,
|
|
"epoch": 0.29805886036318097,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8416489290807947,
|
|
"kl": 0.002620697021484375,
|
|
"learning_rate": 8.982052614742218e-07,
|
|
"loss": 0.011,
|
|
"num_tokens": 12642901.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0193631649017334,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07904007503321656,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05617218071571685,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08243965245133134,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 238
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1465.0,
|
|
"completions/mean_length": 1096.1875,
|
|
"completions/mean_terminated_length": 782.1111450195312,
|
|
"completions/min_length": 444.0,
|
|
"completions/min_terminated_length": 444.0,
|
|
"epoch": 0.29931120851596743,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.052640590300802,
|
|
"kl": 0.003078460693359375,
|
|
"learning_rate": 8.96956449551466e-07,
|
|
"loss": 0.0293,
|
|
"num_tokens": 12685520.0,
|
|
"reward": 3.725290298461914e-08,
|
|
"reward_std": 1.0355110168457031,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0007223476637822487,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045707258037314374,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6416666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1164283279771532,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 239
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1352.0,
|
|
"completions/mean_length": 1176.375,
|
|
"completions/mean_terminated_length": 1101.6923828125,
|
|
"completions/min_length": 795.0,
|
|
"completions/min_terminated_length": 795.0,
|
|
"epoch": 0.3005635566687539,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.409137882904874,
|
|
"kl": 0.00447845458984375,
|
|
"learning_rate": 8.957010136910177e-07,
|
|
"loss": 0.0027,
|
|
"num_tokens": 12732478.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0500978231430054,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03744221002235665,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07008909373099989,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09574271077563382,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 240
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1235.0,
|
|
"completions/mean_length": 1412.4375,
|
|
"completions/mean_terminated_length": 799.5,
|
|
"completions/min_length": 364.0,
|
|
"completions/min_terminated_length": 364.0,
|
|
"epoch": 0.3018159048215404,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.773019820684676,
|
|
"kl": 0.003604888916015625,
|
|
"learning_rate": 8.944389778610978e-07,
|
|
"loss": -0.0118,
|
|
"num_tokens": 12801637.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.0457574129104614,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0662282436201746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07940471297587236,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.22273551829717486,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 241
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1189.0,
|
|
"completions/mean_length": 1244.8125,
|
|
"completions/mean_terminated_length": 989.625,
|
|
"completions/min_length": 844.0,
|
|
"completions/min_terminated_length": 844.0,
|
|
"epoch": 0.3030682529743269,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2644500704934876,
|
|
"kl": 0.00386810302734375,
|
|
"learning_rate": 8.931703661559313e-07,
|
|
"loss": -0.0143,
|
|
"num_tokens": 12856914.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8334095478057861,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.056274055481427915,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06634909249021953,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07888106377466157,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 242
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1470.0,
|
|
"completions/mean_length": 1456.9375,
|
|
"completions/mean_terminated_length": 1327.75,
|
|
"completions/min_length": 1033.0,
|
|
"completions/min_terminated_length": 1033.0,
|
|
"epoch": 0.30432060112711334,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6637299937398455,
|
|
"kl": 0.003570556640625,
|
|
"learning_rate": 8.918952027952867e-07,
|
|
"loss": 0.0284,
|
|
"num_tokens": 12917977.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6500852704048157,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.022227592869964712,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.035807130460280175,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16324260518672246,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 243
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1087.0,
|
|
"completions/mean_length": 1474.1875,
|
|
"completions/mean_terminated_length": 1087.0,
|
|
"completions/min_length": 1087.0,
|
|
"completions/min_terminated_length": 1087.0,
|
|
"epoch": 0.3055729492798998,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9975900171544794,
|
|
"kl": 0.003841400146484375,
|
|
"learning_rate": 8.906135121240139e-07,
|
|
"loss": -0.0025,
|
|
"num_tokens": 12975724.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0467472076416016,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.042561575382490995,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12106724719901756,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 244
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1411.0,
|
|
"completions/mean_length": 1228.9375,
|
|
"completions/mean_terminated_length": 1066.300048828125,
|
|
"completions/min_length": 227.0,
|
|
"completions/min_terminated_length": 227.0,
|
|
"epoch": 0.3068252974326863,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9921353466243374,
|
|
"kl": 0.00344085693359375,
|
|
"learning_rate": 8.89325318611579e-07,
|
|
"loss": -0.1088,
|
|
"num_tokens": 13028715.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8063486218452454,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04616985070885913,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.17900764914436168,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0718795288428261,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 245
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1489.0,
|
|
"completions/mean_length": 1367.3125,
|
|
"completions/mean_terminated_length": 1307.0,
|
|
"completions/min_length": 1073.0,
|
|
"completions/min_terminated_length": 1073.0,
|
|
"epoch": 0.30807764558547274,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.02852662262937,
|
|
"kl": 0.00337982177734375,
|
|
"learning_rate": 8.880306468515979e-07,
|
|
"loss": 0.0285,
|
|
"num_tokens": 13077528.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.4837535619735718,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010209232644034694,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1481102929172379,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.16947631758514883,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 246
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1491.0,
|
|
"completions/mean_length": 1327.5625,
|
|
"completions/mean_terminated_length": 1270.0833740234375,
|
|
"completions/min_length": 1026.0,
|
|
"completions/min_terminated_length": 1026.0,
|
|
"epoch": 0.30932999373825926,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 5.433838129064804,
|
|
"kl": 0.009929656982421875,
|
|
"learning_rate": 8.867295215613659e-07,
|
|
"loss": 0.0288,
|
|
"num_tokens": 13145409.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.691638708114624,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06467589999789795,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0938792951394418,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.5958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0824396524513313,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 247
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1490.0,
|
|
"completions/mean_length": 1467.9375,
|
|
"completions/mean_terminated_length": 1397.4000244140625,
|
|
"completions/min_length": 1284.0,
|
|
"completions/min_terminated_length": 1284.0,
|
|
"epoch": 0.3105823418910457,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9888443937256404,
|
|
"kl": 0.0040283203125,
|
|
"learning_rate": 8.85421967581386e-07,
|
|
"loss": 0.0184,
|
|
"num_tokens": 13198848.0,
|
|
"reward": -2.2351741790771484e-08,
|
|
"reward_std": 0.9693495035171509,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.032938770819161474,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.162768145864506,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.675,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1803289175881631,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 248
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 1498.9375,
|
|
"completions/mean_terminated_length": 1483.0,
|
|
"completions/min_length": 1483.0,
|
|
"completions/min_terminated_length": 1483.0,
|
|
"epoch": 0.3118346900438322,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.721188390535492,
|
|
"kl": 0.003574371337890625,
|
|
"learning_rate": 8.841080098748959e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 13257207.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9934348464012146,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.054418946541605284,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14760181642272932,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12345339501504504,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 249
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1351.0,
|
|
"completions/mean_length": 1359.3125,
|
|
"completions/mean_terminated_length": 1178.4285888671875,
|
|
"completions/min_length": 890.0,
|
|
"completions/min_terminated_length": 890.0,
|
|
"epoch": 0.31308703819661865,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2668088351359708,
|
|
"kl": 0.005401611328125,
|
|
"learning_rate": 8.827876735273893e-07,
|
|
"loss": -0.03,
|
|
"num_tokens": 13314820.0,
|
|
"reward": -3.725290298461914e-09,
|
|
"reward_std": 1.0606722831726074,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.1087294165966756,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07396732734066605,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.775,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10000000000000002,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 250
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1458.0,
|
|
"completions/mean_length": 1409.3125,
|
|
"completions/mean_terminated_length": 1258.166748046875,
|
|
"completions/min_length": 1036.0,
|
|
"completions/min_terminated_length": 1036.0,
|
|
"epoch": 0.3143393863494051,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0088889020769733,
|
|
"kl": 0.003604888916015625,
|
|
"learning_rate": 8.814609837461385e-07,
|
|
"loss": 0.0432,
|
|
"num_tokens": 13381449.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6518675088882446,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.005307542092858496,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.017828779266863094,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1112221667221529,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 251
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1472.0,
|
|
"completions/mean_length": 1289.9375,
|
|
"completions/mean_terminated_length": 1126.5555419921875,
|
|
"completions/min_length": 1031.0,
|
|
"completions/min_terminated_length": 1031.0,
|
|
"epoch": 0.31559173450219163,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.2400933230709956,
|
|
"kl": 0.0020122528076171875,
|
|
"learning_rate": 8.801279658597131e-07,
|
|
"loss": 0.0011,
|
|
"num_tokens": 13430872.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8595645427703857,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.12188488436675578,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.32049499716061297,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 252
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1234.0,
|
|
"completions/mean_length": 1483.375,
|
|
"completions/mean_terminated_length": 1234.0,
|
|
"completions/min_length": 1234.0,
|
|
"completions/min_terminated_length": 1234.0,
|
|
"epoch": 0.3168440826549781,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0575565927809905,
|
|
"kl": 0.004093170166015625,
|
|
"learning_rate": 8.787886453174951e-07,
|
|
"loss": -0.0053,
|
|
"num_tokens": 13479446.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9997775554656982,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.051598953607968394,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06761287588738078,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6458333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12583057392117916,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 253
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1317.0,
|
|
"completions/max_terminated_length": 1317.0,
|
|
"completions/mean_length": 1068.0625,
|
|
"completions/mean_terminated_length": 1068.0625,
|
|
"completions/min_length": 758.0,
|
|
"completions/min_terminated_length": 758.0,
|
|
"epoch": 0.31809643080776456,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.56780625703575,
|
|
"kl": 0.00376129150390625,
|
|
"learning_rate": 8.77443047689195e-07,
|
|
"loss": -0.0249,
|
|
"num_tokens": 13534791.0,
|
|
"reward": 3.725290298461914e-09,
|
|
"reward_std": 1.0647456645965576,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.12037176129735677,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.15720532676467985,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08681611046941137,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 254
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1149.0,
|
|
"completions/mean_length": 1264.8125,
|
|
"completions/mean_terminated_length": 1029.625,
|
|
"completions/min_length": 999.0,
|
|
"completions/min_terminated_length": 999.0,
|
|
"epoch": 0.319348778960551,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 1.9002914944914675,
|
|
"kl": 0.001689910888671875,
|
|
"learning_rate": 8.760911986643621e-07,
|
|
"loss": 0.0079,
|
|
"num_tokens": 13585044.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0633113384246826,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11099520216632296,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11289406797895053,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7374999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11013459778666118,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 255
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1356.0,
|
|
"completions/max_terminated_length": 1356.0,
|
|
"completions/mean_length": 1039.625,
|
|
"completions/mean_terminated_length": 1039.625,
|
|
"completions/min_length": 816.0,
|
|
"completions/min_terminated_length": 816.0,
|
|
"epoch": 0.3206011271133375,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 1.7437212883045385,
|
|
"kl": 0.0007447004318237305,
|
|
"learning_rate": 8.747331240518946e-07,
|
|
"loss": -0.0359,
|
|
"num_tokens": 13622654.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.80560302734375,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05283560581406991,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1274858045865064,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11213417888437975,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 256
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 1147.25,
|
|
"completions/mean_terminated_length": 1123.7333984375,
|
|
"completions/min_length": 871.0,
|
|
"completions/min_terminated_length": 871.0,
|
|
"epoch": 0.32185347526612396,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.829288365048447,
|
|
"kl": 0.00440216064453125,
|
|
"learning_rate": 8.73368849779547e-07,
|
|
"loss": -0.0586,
|
|
"num_tokens": 13666658.0,
|
|
"reward": 2.2351741790771484e-08,
|
|
"reward_std": 1.0113918781280518,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.004399012913845209,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.022002579783276802,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.15104573749303493,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 257
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 1205.0,
|
|
"completions/mean_terminated_length": 1136.923095703125,
|
|
"completions/min_length": 934.0,
|
|
"completions/min_terminated_length": 934.0,
|
|
"epoch": 0.3231058234189105,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5508475650699274,
|
|
"kl": 0.00417327880859375,
|
|
"learning_rate": 8.719984018934348e-07,
|
|
"loss": -0.0198,
|
|
"num_tokens": 13713002.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 0.9411071538925171,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.049397690395078006,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14628546425305664,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13381856152046848,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 258
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1379.0,
|
|
"completions/mean_length": 1393.9375,
|
|
"completions/mean_terminated_length": 1287.875,
|
|
"completions/min_length": 1155.0,
|
|
"completions/min_terminated_length": 1155.0,
|
|
"epoch": 0.32435817157169694,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1259216947519164,
|
|
"kl": 0.004150390625,
|
|
"learning_rate": 8.706218065575374e-07,
|
|
"loss": 0.0051,
|
|
"num_tokens": 13765289.0,
|
|
"reward": -5.960464477539063e-08,
|
|
"reward_std": 0.7700310945510864,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -5.960464477539063e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.011427243535616135,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.12374645217812205,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13045504405165223,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 259
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1404.0,
|
|
"completions/mean_length": 1161.25,
|
|
"completions/mean_terminated_length": 1007.2727661132812,
|
|
"completions/min_length": 872.0,
|
|
"completions/min_terminated_length": 872.0,
|
|
"epoch": 0.3256105197244834,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3286130814036103,
|
|
"kl": 0.00212058424949646,
|
|
"learning_rate": 8.692390900531985e-07,
|
|
"loss": 0.0569,
|
|
"num_tokens": 13819269.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.45210930705070496,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.23343450769100488,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.33023521153193414,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14981470036162822,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 260
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1162.0,
|
|
"completions/max_terminated_length": 1162.0,
|
|
"completions/mean_length": 974.1875,
|
|
"completions/mean_terminated_length": 974.1875,
|
|
"completions/min_length": 675.0,
|
|
"completions/min_terminated_length": 675.0,
|
|
"epoch": 0.32686286787726987,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.6973290112944848,
|
|
"kl": 0.0045013427734375,
|
|
"learning_rate": 8.678502787786249e-07,
|
|
"loss": -0.0481,
|
|
"num_tokens": 13849256.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8221656084060669,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.01926574676180823,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.028234090328970243,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.6875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7083333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042256,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 261
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1350.0,
|
|
"completions/mean_length": 1254.3125,
|
|
"completions/mean_terminated_length": 1106.9000244140625,
|
|
"completions/min_length": 850.0,
|
|
"completions/min_terminated_length": 850.0,
|
|
"epoch": 0.32811521603005633,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8513068805319826,
|
|
"kl": 0.003444671630859375,
|
|
"learning_rate": 8.664553992483812e-07,
|
|
"loss": -0.0294,
|
|
"num_tokens": 13886621.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8134012818336487,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04343925396813008,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.08880475360320686,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11979921473804349,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 262
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1419.0,
|
|
"completions/mean_length": 1267.5,
|
|
"completions/mean_terminated_length": 1161.8182373046875,
|
|
"completions/min_length": 936.0,
|
|
"completions/min_terminated_length": 936.0,
|
|
"epoch": 0.32936756418284285,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8893558213524178,
|
|
"kl": 0.003467559814453125,
|
|
"learning_rate": 8.650544780928851e-07,
|
|
"loss": -0.0196,
|
|
"num_tokens": 13935477.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.748847246170044,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.03399978669526769,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0913917502530681,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8291666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1060223596263578,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 263
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1467.0,
|
|
"completions/mean_length": 1393.75,
|
|
"completions/mean_terminated_length": 1287.5,
|
|
"completions/min_length": 1085.0,
|
|
"completions/min_terminated_length": 1085.0,
|
|
"epoch": 0.3306199123356293,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2549999172638953,
|
|
"kl": 0.00475311279296875,
|
|
"learning_rate": 8.63647542057898e-07,
|
|
"loss": -0.0273,
|
|
"num_tokens": 13998809.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9957271814346313,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.032845331546287986,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1426354161680431,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.65,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08255189164891871,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 264
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1476.0,
|
|
"completions/mean_length": 1465.625,
|
|
"completions/mean_terminated_length": 1316.666748046875,
|
|
"completions/min_length": 1156.0,
|
|
"completions/min_terminated_length": 1156.0,
|
|
"epoch": 0.3318722604884158,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.761811279986975,
|
|
"kl": 0.004451751708984375,
|
|
"learning_rate": 8.622346180040149e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 14063899.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9594628810882568,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003469042362222641,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06436545386363138,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 265
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1351.0,
|
|
"completions/mean_length": 1490.6875,
|
|
"completions/mean_terminated_length": 1351.0,
|
|
"completions/min_length": 1351.0,
|
|
"completions/min_terminated_length": 1351.0,
|
|
"epoch": 0.33312460864120225,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.812294289039839,
|
|
"kl": 0.00394439697265625,
|
|
"learning_rate": 8.608157329061513e-07,
|
|
"loss": -0.0088,
|
|
"num_tokens": 14117462.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8838216066360474,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.040579408270268943,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06117251081342495,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10809803506625451,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 266
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1498.0,
|
|
"completions/mean_length": 1466.375,
|
|
"completions/mean_terminated_length": 1410.3333740234375,
|
|
"completions/min_length": 1160.0,
|
|
"completions/min_terminated_length": 1160.0,
|
|
"epoch": 0.3343769567939887,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1377512293638716,
|
|
"kl": 0.00469207763671875,
|
|
"learning_rate": 8.59390913853028e-07,
|
|
"loss": 0.0227,
|
|
"num_tokens": 14167892.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.5932345390319824,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.095152127303474,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.13689770081097544,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 1.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 267
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.33562930494677523,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.006613104134553,
|
|
"kl": 0.004669189453125,
|
|
"learning_rate": 8.579601880466547e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 14229372.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0337638854980469,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.04680772992368523,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14029739799038618,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7333333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10886621079036349,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 268
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1468.0,
|
|
"completions/mean_length": 1392.9375,
|
|
"completions/mean_terminated_length": 1214.5,
|
|
"completions/min_length": 1000.0,
|
|
"completions/min_terminated_length": 1000.0,
|
|
"epoch": 0.3368816530995617,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7703650077113675,
|
|
"kl": 0.0036773681640625,
|
|
"learning_rate": 8.565235828018099e-07,
|
|
"loss": 0.0013,
|
|
"num_tokens": 14289123.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0367697477340698,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07296543210522512,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07812168004547569,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078611,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 269
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1487.0,
|
|
"completions/mean_length": 1464.5625,
|
|
"completions/mean_terminated_length": 1358.25,
|
|
"completions/min_length": 1214.0,
|
|
"completions/min_terminated_length": 1214.0,
|
|
"epoch": 0.33813400125234816,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.773518375849927,
|
|
"kl": 0.0041961669921875,
|
|
"learning_rate": 8.550811255455198e-07,
|
|
"loss": -0.0021,
|
|
"num_tokens": 14352892.0,
|
|
"reward": -2.2351741790771484e-08,
|
|
"reward_std": 0.9712283611297607,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06869379781464208,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0930651391561654,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12881223774390613,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 270
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.3393863494051346,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.79098774129514,
|
|
"kl": 0.003692626953125,
|
|
"learning_rate": 8.536328438165346e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 14414740.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0314404964447021,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0126400376983615,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11352147882865961,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07252075054258102,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 271
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1366.0,
|
|
"completions/mean_length": 1491.625,
|
|
"completions/mean_terminated_length": 1366.0,
|
|
"completions/min_length": 1366.0,
|
|
"completions/min_terminated_length": 1366.0,
|
|
"epoch": 0.3406386975579211,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3328071582670127,
|
|
"kl": 0.002582550048828125,
|
|
"learning_rate": 8.521787652648026e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 14475390.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.066014051437378,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.04816321266725149,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.23143656867957818,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10292032157252809,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 272
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1274.0,
|
|
"completions/max_terminated_length": 1274.0,
|
|
"completions/mean_length": 966.0,
|
|
"completions/mean_terminated_length": 966.0,
|
|
"completions/min_length": 818.0,
|
|
"completions/min_terminated_length": 818.0,
|
|
"epoch": 0.34189104571070755,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.623716761400387,
|
|
"kl": 0.00237274169921875,
|
|
"learning_rate": 8.507189176509429e-07,
|
|
"loss": 0.0118,
|
|
"num_tokens": 14519830.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8626605868339539,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09591776756938776,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0561472451616448,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09888264649460884,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 273
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1356.0,
|
|
"completions/mean_length": 1441.8125,
|
|
"completions/mean_terminated_length": 1267.25,
|
|
"completions/min_length": 1188.0,
|
|
"completions/min_terminated_length": 1188.0,
|
|
"epoch": 0.3431433938634941,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6860149670271087,
|
|
"kl": 0.003993988037109375,
|
|
"learning_rate": 8.492533288457142e-07,
|
|
"loss": 0.0176,
|
|
"num_tokens": 14562059.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7108601331710815,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08617312005850387,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09760563193409819,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10878112581387149,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 274
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1485.0,
|
|
"completions/mean_length": 1495.3125,
|
|
"completions/mean_terminated_length": 1462.5,
|
|
"completions/min_length": 1440.0,
|
|
"completions/min_terminated_length": 1440.0,
|
|
"epoch": 0.34439574201628054,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.40909669375685,
|
|
"kl": 0.00632476806640625,
|
|
"learning_rate": 8.477820268294844e-07,
|
|
"loss": 0.0006,
|
|
"num_tokens": 14626280.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9581431150436401,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0518540297916247,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05711745364940273,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07876359377087681,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 275
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 1498.8125,
|
|
"completions/mean_terminated_length": 1490.5,
|
|
"completions/min_length": 1487.0,
|
|
"completions/min_terminated_length": 1487.0,
|
|
"epoch": 0.345648090169067,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.606304176627287,
|
|
"kl": 0.003498077392578125,
|
|
"learning_rate": 8.463050396916945e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 14686461.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.8919962048530579,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.16232941024284467,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.38333692394397534,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901162,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 276
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1387.0,
|
|
"completions/mean_length": 1296.25,
|
|
"completions/mean_terminated_length": 1034.2857666015625,
|
|
"completions/min_length": 430.0,
|
|
"completions/min_terminated_length": 430.0,
|
|
"epoch": 0.34690043832185347,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4727706531128004,
|
|
"kl": 0.0047149658203125,
|
|
"learning_rate": 8.44822395630324e-07,
|
|
"loss": -0.0713,
|
|
"num_tokens": 14729641.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0214866399765015,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.006892221922202982,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.027501536576714,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07969850595746357,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 277
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1486.0,
|
|
"completions/mean_length": 1469.6875,
|
|
"completions/mean_terminated_length": 1338.3333740234375,
|
|
"completions/min_length": 1234.0,
|
|
"completions/min_terminated_length": 1234.0,
|
|
"epoch": 0.34815278647463993,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.079175222753427,
|
|
"kl": 0.004638671875,
|
|
"learning_rate": 8.433341229513516e-07,
|
|
"loss": 0.0011,
|
|
"num_tokens": 14784988.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 1.011260986328125,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.016744175612928278,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10326622112744127,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09098229375970787,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 278
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 1403.0,
|
|
"completions/max_terminated_length": 1403.0,
|
|
"completions/mean_length": 926.75,
|
|
"completions/mean_terminated_length": 926.75,
|
|
"completions/min_length": 629.0,
|
|
"completions/min_terminated_length": 629.0,
|
|
"epoch": 0.34940513462742645,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3928033221665683,
|
|
"kl": 0.0011532902717590332,
|
|
"learning_rate": 8.41840250068215e-07,
|
|
"loss": 0.0325,
|
|
"num_tokens": 14819992.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.027898907661438,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0026503222290372498,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06993382935974904,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.059472994182545084,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 279
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1429.0,
|
|
"completions/mean_length": 1439.4375,
|
|
"completions/mean_terminated_length": 1338.5,
|
|
"completions/min_length": 1219.0,
|
|
"completions/min_terminated_length": 1219.0,
|
|
"epoch": 0.3506574827802129,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.958647342200965,
|
|
"kl": 0.00386810302734375,
|
|
"learning_rate": 8.403408055012688e-07,
|
|
"loss": 0.0226,
|
|
"num_tokens": 14868223.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.9957724213600159,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.010003602936438873,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10122225063918935,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.575,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14782371884055634,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 280
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1497.0,
|
|
"completions/mean_length": 1458.3125,
|
|
"completions/mean_terminated_length": 1366.5999755859375,
|
|
"completions/min_length": 1197.0,
|
|
"completions/min_terminated_length": 1197.0,
|
|
"epoch": 0.3519098309329994,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2796328783487394,
|
|
"kl": 0.0066375732421875,
|
|
"learning_rate": 8.388358178772394e-07,
|
|
"loss": -0.0218,
|
|
"num_tokens": 14927820.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8733463287353516,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.014896438499357663,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0518786397936184,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13655822255780922,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 281
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1483.0,
|
|
"completions/mean_length": 1253.0625,
|
|
"completions/mean_terminated_length": 1140.8182373046875,
|
|
"completions/min_length": 807.0,
|
|
"completions/min_terminated_length": 807.0,
|
|
"epoch": 0.35316217908578584,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.1237440148175604,
|
|
"kl": 0.004863739013671875,
|
|
"learning_rate": 8.373253159286788e-07,
|
|
"loss": -0.0073,
|
|
"num_tokens": 14982213.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7831696271896362,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.06306545956559828,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05774599513752542,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14580555290954889,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 282
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1496.0,
|
|
"completions/mean_length": 1441.875,
|
|
"completions/mean_terminated_length": 1345.0,
|
|
"completions/min_length": 1205.0,
|
|
"completions/min_terminated_length": 1205.0,
|
|
"epoch": 0.3544145272385723,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.904795417720074,
|
|
"kl": 0.00449371337890625,
|
|
"learning_rate": 8.35809328493416e-07,
|
|
"loss": -0.0205,
|
|
"num_tokens": 15040715.0,
|
|
"reward": 3.725290298461914e-09,
|
|
"reward_std": 1.0330736637115479,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.012980308714010343,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0673415334549809,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06382847385042258,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 283
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1163.0,
|
|
"completions/mean_length": 1267.125,
|
|
"completions/mean_terminated_length": 1034.25,
|
|
"completions/min_length": 623.0,
|
|
"completions/min_terminated_length": 623.0,
|
|
"epoch": 0.35566687539135877,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 1.9530736863410463,
|
|
"kl": 0.0021953582763671875,
|
|
"learning_rate": 8.342878845140067e-07,
|
|
"loss": 0.0243,
|
|
"num_tokens": 15099253.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.060163974761963,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.030749215825924263,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.045245562410845486,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09179284245476839,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 284
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1380.0,
|
|
"completions/mean_length": 1273.375,
|
|
"completions/mean_terminated_length": 1046.75,
|
|
"completions/min_length": 856.0,
|
|
"completions/min_terminated_length": 856.0,
|
|
"epoch": 0.3569192235441453,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9643132058260195,
|
|
"kl": 0.00377655029296875,
|
|
"learning_rate": 8.327610130371804e-07,
|
|
"loss": -0.0085,
|
|
"num_tokens": 15156899.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.9131340980529785,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05994073836967858,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.16156243225331035,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590962,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 285
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1133.0,
|
|
"completions/mean_length": 1196.4375,
|
|
"completions/mean_terminated_length": 892.875,
|
|
"completions/min_length": 726.0,
|
|
"completions/min_terminated_length": 726.0,
|
|
"epoch": 0.35817157169693176,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4448564572392772,
|
|
"kl": 0.005279541015625,
|
|
"learning_rate": 8.312287432132857e-07,
|
|
"loss": -0.0008,
|
|
"num_tokens": 15210234.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6276436448097229,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08401960696737835,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.31657785119011167,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6833333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11800816042090449,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 286
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.1875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 1118.0,
|
|
"completions/mean_terminated_length": 1029.84619140625,
|
|
"completions/min_length": 768.0,
|
|
"completions/min_terminated_length": 768.0,
|
|
"epoch": 0.3594239198497182,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.649351006275311,
|
|
"kl": 0.004192352294921875,
|
|
"learning_rate": 8.296911042957347e-07,
|
|
"loss": 0.0474,
|
|
"num_tokens": 15254266.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0014917850494385,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05039245601276097,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0698277819618762,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6791666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08850612031567838,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 287
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 1408.8125,
|
|
"completions/mean_terminated_length": 1317.625,
|
|
"completions/min_length": 1145.0,
|
|
"completions/min_terminated_length": 1145.0,
|
|
"epoch": 0.3606762680025047,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.3753456567317306,
|
|
"kl": 0.003170013427734375,
|
|
"learning_rate": 8.281481256404427e-07,
|
|
"loss": -0.0065,
|
|
"num_tokens": 15310551.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.0467666387557983,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.011685861651235842,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.020940553119970465,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09388724521901161,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 288
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1287.0,
|
|
"completions/mean_length": 1308.5,
|
|
"completions/mean_terminated_length": 1117.0,
|
|
"completions/min_length": 1059.0,
|
|
"completions/min_terminated_length": 1059.0,
|
|
"epoch": 0.36192861615529115,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.7116355558289365,
|
|
"kl": 0.0031452178955078125,
|
|
"learning_rate": 8.265998367052699e-07,
|
|
"loss": -0.0148,
|
|
"num_tokens": 15357047.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7932579517364502,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.05102504905151101,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.046974298933007336,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7541666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.102469507659596,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 289
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1322.0,
|
|
"completions/mean_length": 1303.375,
|
|
"completions/mean_terminated_length": 1150.4444580078125,
|
|
"completions/min_length": 910.0,
|
|
"completions/min_terminated_length": 910.0,
|
|
"epoch": 0.36318096430807767,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6471576640483025,
|
|
"kl": 0.002368927001953125,
|
|
"learning_rate": 8.25046267049458e-07,
|
|
"loss": -0.0155,
|
|
"num_tokens": 15419477.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9311500191688538,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.17596829941789516,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.18219217687822756,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.8125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7666666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12412657816683506,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 290
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.36443331246086413,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.972210343638841,
|
|
"kl": 0.004058837890625,
|
|
"learning_rate": 8.234874463330651e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 15481293.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.6159095764160156,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06449275539626861,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07616565949841655,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7583333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10852547064066473,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 291
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.25,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1348.0,
|
|
"completions/mean_length": 1140.125,
|
|
"completions/mean_terminated_length": 1020.1666870117188,
|
|
"completions/min_length": 215.0,
|
|
"completions/min_terminated_length": 215.0,
|
|
"epoch": 0.3656856606136506,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.7273022702751355,
|
|
"kl": 0.00434112548828125,
|
|
"learning_rate": 8.219234043164007e-07,
|
|
"loss": -0.0148,
|
|
"num_tokens": 15538271.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8317296504974365,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.002159562349982134,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.040954201238117,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.1261979632400061,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 292
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 1346.875,
|
|
"completions/mean_terminated_length": 1227.77783203125,
|
|
"completions/min_length": 1055.0,
|
|
"completions/min_terminated_length": 1055.0,
|
|
"epoch": 0.36693800876643706,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.479167145868108,
|
|
"kl": 0.005218505859375,
|
|
"learning_rate": 8.203541708594571e-07,
|
|
"loss": -0.0154,
|
|
"num_tokens": 15584509.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0486056804656982,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005770089344222506,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07441253794038902,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7916666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08388704928078614,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 293
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1474.0,
|
|
"completions/mean_length": 1284.5625,
|
|
"completions/mean_terminated_length": 1186.6363525390625,
|
|
"completions/min_length": 979.0,
|
|
"completions/min_terminated_length": 979.0,
|
|
"epoch": 0.3681903569192235,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.139340219060384,
|
|
"kl": 0.00438690185546875,
|
|
"learning_rate": 8.18779775921339e-07,
|
|
"loss": 0.0201,
|
|
"num_tokens": 15631742.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 1.0337092876434326,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.061225139692727595,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0882517727987926,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08153617692869927,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 294
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1388.0,
|
|
"completions/mean_length": 1493.0,
|
|
"completions/mean_terminated_length": 1388.0,
|
|
"completions/min_length": 1388.0,
|
|
"completions/min_terminated_length": 1388.0,
|
|
"epoch": 0.36944270507201,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.4977483724240614,
|
|
"kl": 0.0029296875,
|
|
"learning_rate": 8.17200249559692e-07,
|
|
"loss": -0.0007,
|
|
"num_tokens": 15698798.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.4475941061973572,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.24246809612484624,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.35195872278638696,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.05900408021045224,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 295
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1412.0,
|
|
"completions/mean_length": 1458.625,
|
|
"completions/mean_terminated_length": 1367.5999755859375,
|
|
"completions/min_length": 1330.0,
|
|
"completions/min_terminated_length": 1330.0,
|
|
"epoch": 0.3706950532247965,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.867561636937004,
|
|
"kl": 0.004486083984375,
|
|
"learning_rate": 8.156156219301287e-07,
|
|
"loss": -0.0096,
|
|
"num_tokens": 15766096.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.9567909240722656,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.09166855392489899,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.11839819598536988,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13709958532503408,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 296
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1384.0,
|
|
"completions/mean_length": 1323.6875,
|
|
"completions/mean_terminated_length": 1217.9000244140625,
|
|
"completions/min_length": 1037.0,
|
|
"completions/min_terminated_length": 1037.0,
|
|
"epoch": 0.371947401377583,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.38013590229778,
|
|
"kl": 0.00470733642578125,
|
|
"learning_rate": 8.140259232856521e-07,
|
|
"loss": -0.0394,
|
|
"num_tokens": 15817547.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9704372882843018,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.05488740961091947,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10481500155411475,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13158576980363348,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 297
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1423.0,
|
|
"completions/mean_length": 1495.1875,
|
|
"completions/mean_terminated_length": 1423.0,
|
|
"completions/min_length": 1423.0,
|
|
"completions/min_terminated_length": 1423.0,
|
|
"epoch": 0.37319974953036944,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0382889306606327,
|
|
"kl": 0.004367828369140625,
|
|
"learning_rate": 8.124311839760797e-07,
|
|
"loss": -0.0027,
|
|
"num_tokens": 15868646.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8351828455924988,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03419630895774928,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1367852358309971,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6958333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09418264367902598,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 298
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.6875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1344.0,
|
|
"completions/mean_length": 1399.875,
|
|
"completions/mean_terminated_length": 1179.5999755859375,
|
|
"completions/min_length": 1011.0,
|
|
"completions/min_terminated_length": 1011.0,
|
|
"epoch": 0.3744520976831559,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.362094019703377,
|
|
"kl": 0.003170013427734375,
|
|
"learning_rate": 8.108314344474623e-07,
|
|
"loss": 0.0162,
|
|
"num_tokens": 15934516.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 0.9300060868263245,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.026608295676684646,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05700271984957867,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09803627446568497,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 299
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1385.0,
|
|
"completions/mean_length": 1310.1875,
|
|
"completions/mean_terminated_length": 1120.375,
|
|
"completions/min_length": 849.0,
|
|
"completions/min_terminated_length": 849.0,
|
|
"epoch": 0.37570444583594237,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2267030639366325,
|
|
"kl": 0.004962921142578125,
|
|
"learning_rate": 8.092267052415044e-07,
|
|
"loss": 0.0104,
|
|
"num_tokens": 15981759.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9144766330718994,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.13077711907103481,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1576724545552638,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.55,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999157,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 300
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1495.0,
|
|
"completions/mean_length": 1499.6875,
|
|
"completions/mean_terminated_length": 1495.0,
|
|
"completions/min_length": 1495.0,
|
|
"completions/min_terminated_length": 1495.0,
|
|
"epoch": 0.3769567939887289,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.857067953077971,
|
|
"kl": 0.004589080810546875,
|
|
"learning_rate": 8.076170269949795e-07,
|
|
"loss": 0.0005,
|
|
"num_tokens": 16032986.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.8725603818893433,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.01691320115670981,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.057867471716033625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6749999999999999,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194864,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 301
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.5,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1476.0,
|
|
"completions/mean_length": 1402.5,
|
|
"completions/mean_terminated_length": 1305.0,
|
|
"completions/min_length": 1100.0,
|
|
"completions/min_terminated_length": 1100.0,
|
|
"epoch": 0.37820914214151535,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0631024256482773,
|
|
"kl": 0.00476837158203125,
|
|
"learning_rate": 8.060024304391464e-07,
|
|
"loss": -0.0059,
|
|
"num_tokens": 16075122.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 1.0385990142822266,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0014002892068640102,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04032032793331211,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.1875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.40311288741492746,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7708333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.11917929226045819,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 302
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1480.0,
|
|
"completions/mean_length": 1234.625,
|
|
"completions/mean_terminated_length": 1196.71435546875,
|
|
"completions/min_length": 1007.0,
|
|
"completions/min_terminated_length": 1007.0,
|
|
"epoch": 0.3794614902943018,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.625794290754689,
|
|
"kl": 0.0052490234375,
|
|
"learning_rate": 8.043829463991619e-07,
|
|
"loss": -0.0729,
|
|
"num_tokens": 16137860.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.7281184196472168,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.15255108490634472,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.0853071621433351,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08944271909999162,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 303
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1479.0,
|
|
"completions/mean_length": 1229.0,
|
|
"completions/mean_terminated_length": 1105.8182373046875,
|
|
"completions/min_length": 759.0,
|
|
"completions/min_terminated_length": 759.0,
|
|
"epoch": 0.3807138384470883,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.260598284534238,
|
|
"kl": 0.00495147705078125,
|
|
"learning_rate": 8.027586057934928e-07,
|
|
"loss": -0.0588,
|
|
"num_tokens": 16193676.0,
|
|
"reward": 7.450580596923828e-09,
|
|
"reward_std": 1.0218051671981812,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.00276089248932003,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.03710765345598682,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7416666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14168300559373406,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 304
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1381.0,
|
|
"completions/mean_length": 1335.75,
|
|
"completions/mean_terminated_length": 1208.0,
|
|
"completions/min_length": 1052.0,
|
|
"completions/min_terminated_length": 1052.0,
|
|
"epoch": 0.38196618659987475,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.305649016415018,
|
|
"kl": 0.00536346435546875,
|
|
"learning_rate": 8.011294396333247e-07,
|
|
"loss": 0.035,
|
|
"num_tokens": 16241520.0,
|
|
"reward": 2.2351741790771484e-08,
|
|
"reward_std": 1.0677435398101807,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.2351741790771484e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.0046395341039948005,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04123648809292501,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12995725793078622,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 305
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1387.0,
|
|
"completions/mean_length": 1287.0,
|
|
"completions/mean_terminated_length": 1159.2000732421875,
|
|
"completions/min_length": 871.0,
|
|
"completions/min_terminated_length": 871.0,
|
|
"epoch": 0.38321853475266127,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.727887770111367,
|
|
"kl": 0.00641632080078125,
|
|
"learning_rate": 7.99495479021971e-07,
|
|
"loss": -0.022,
|
|
"num_tokens": 16295288.0,
|
|
"reward": -4.470348358154297e-08,
|
|
"reward_std": 1.053145408630371,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -4.470348358154297e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.038974548522257506,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10020848772744548,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7458333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12224747213928168,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 306
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1449.0,
|
|
"completions/mean_length": 1242.0625,
|
|
"completions/mean_terminated_length": 1124.8182373046875,
|
|
"completions/min_length": 732.0,
|
|
"completions/min_terminated_length": 732.0,
|
|
"epoch": 0.38447088290544773,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.5550785119927446,
|
|
"kl": 0.0034637451171875,
|
|
"learning_rate": 7.978567551542785e-07,
|
|
"loss": -0.0756,
|
|
"num_tokens": 16333129.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.6722694635391235,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.06133805044031608,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07932499651372282,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.4375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.725,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08027729719194865,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 307
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.8125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1431.0,
|
|
"completions/mean_length": 1466.0625,
|
|
"completions/mean_terminated_length": 1319.0,
|
|
"completions/min_length": 1256.0,
|
|
"completions/min_terminated_length": 1256.0,
|
|
"epoch": 0.3857232310582342,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.914391532004015,
|
|
"kl": 0.00472259521484375,
|
|
"learning_rate": 7.962132993160318e-07,
|
|
"loss": -0.0031,
|
|
"num_tokens": 16393066.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.5695419311523438,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.03688578385137459,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.05735193102645086,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6208333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863462,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 308
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.875,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1300.0,
|
|
"completions/mean_length": 1468.125,
|
|
"completions/mean_terminated_length": 1245.0,
|
|
"completions/min_length": 1190.0,
|
|
"completions/min_terminated_length": 1190.0,
|
|
"epoch": 0.38697557921102066,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.462122287718944,
|
|
"kl": 0.003597259521484375,
|
|
"learning_rate": 7.945651428833566e-07,
|
|
"loss": -0.0086,
|
|
"num_tokens": 16455300.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9045326113700867,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.07094748829476913,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.07518616664712767,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.875,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6916666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10576003586036263,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 309
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1460.0,
|
|
"completions/mean_length": 1250.0625,
|
|
"completions/mean_terminated_length": 1055.6666259765625,
|
|
"completions/min_length": 953.0,
|
|
"completions/min_terminated_length": 953.0,
|
|
"epoch": 0.3882279273638071,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.9451996655491754,
|
|
"kl": 0.003414154052734375,
|
|
"learning_rate": 7.929123173221197e-07,
|
|
"loss": 0.016,
|
|
"num_tokens": 16510829.0,
|
|
"reward": -2.60770320892334e-08,
|
|
"reward_std": 0.9780210256576538,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.60770320892334e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.11998443212330577,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.273643343882272,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.9375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.25,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.13743685418725535,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327956676483154,
|
|
"step": 310
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1482.0,
|
|
"completions/mean_length": 1477.3125,
|
|
"completions/mean_terminated_length": 1409.25,
|
|
"completions/min_length": 1344.0,
|
|
"completions/min_terminated_length": 1344.0,
|
|
"epoch": 0.3894802755165936,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.5088718729093884,
|
|
"kl": 0.0039215087890625,
|
|
"learning_rate": 7.91254854187329e-07,
|
|
"loss": 0.0109,
|
|
"num_tokens": 16557338.0,
|
|
"reward": -2.9802322387695312e-08,
|
|
"reward_std": 0.8606460094451904,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.11084663306324073,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.10033388109681571,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7375,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.07490735018081414,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 311
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0625,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1250.0,
|
|
"completions/mean_length": 1070.625,
|
|
"completions/mean_terminated_length": 1042.0,
|
|
"completions/min_length": 692.0,
|
|
"completions/min_terminated_length": 692.0,
|
|
"epoch": 0.3907326236693801,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.633658673586784,
|
|
"kl": 0.004913330078125,
|
|
"learning_rate": 7.895927851225315e-07,
|
|
"loss": -0.0045,
|
|
"num_tokens": 16585492.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.8763086795806885,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.010857263566407607,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06826631403415188,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8333333333333334,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.12171612389003693,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 312
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1453.0,
|
|
"completions/mean_length": 1316.9375,
|
|
"completions/mean_terminated_length": 1207.0999755859375,
|
|
"completions/min_length": 926.0,
|
|
"completions/min_terminated_length": 926.0,
|
|
"epoch": 0.3919849718221666,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.5743987456607456,
|
|
"kl": 0.00490570068359375,
|
|
"learning_rate": 7.879261418592072e-07,
|
|
"loss": -0.0521,
|
|
"num_tokens": 16629555.0,
|
|
"reward": 1.4901161193847656e-08,
|
|
"reward_std": 0.9046754240989685,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.07072576648968745,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.14285699045244268,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.3415650255319866,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06885303726590966,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 313
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.3125,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1433.0,
|
|
"completions/mean_length": 1382.4375,
|
|
"completions/mean_terminated_length": 1329.0,
|
|
"completions/min_length": 1093.0,
|
|
"completions/min_terminated_length": 1093.0,
|
|
"epoch": 0.39323731997495304,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.1526667079717625,
|
|
"kl": 0.002288818359375,
|
|
"learning_rate": 7.862549562161661e-07,
|
|
"loss": -0.0277,
|
|
"num_tokens": 16682250.0,
|
|
"reward": -7.450580596923828e-09,
|
|
"reward_std": 1.0446007251739502,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -7.450580596923828e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.1297401874034389,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.1781696946469639,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.75,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.4472135954999579,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.06871842709362772,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 314
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1463.0,
|
|
"completions/mean_length": 1497.6875,
|
|
"completions/mean_terminated_length": 1463.0,
|
|
"completions/min_length": 1463.0,
|
|
"completions/min_terminated_length": 1463.0,
|
|
"epoch": 0.3944896681277395,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.0308641968752266,
|
|
"kl": 0.005645751953125,
|
|
"learning_rate": 7.845792600989385e-07,
|
|
"loss": -0.0009,
|
|
"num_tokens": 16736925.0,
|
|
"reward": 0.0,
|
|
"reward_std": 1.0489060878753662,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.0036366895025502417,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.014546758010200967,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.10183501544346313,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 315
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.75,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1361.0,
|
|
"completions/mean_length": 1448.0,
|
|
"completions/mean_terminated_length": 1292.0,
|
|
"completions/min_length": 1213.0,
|
|
"completions/min_terminated_length": 1213.0,
|
|
"epoch": 0.39574201628052597,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.5606215816363824,
|
|
"kl": 0.003444671630859375,
|
|
"learning_rate": 7.828990854991669e-07,
|
|
"loss": -0.0016,
|
|
"num_tokens": 16805501.0,
|
|
"reward": -1.4901161193847656e-08,
|
|
"reward_std": 1.0115642547607422,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -1.4901161193847656e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.021560930387654664,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.031239915717000032,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.6708333333333333,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.08595864638818418,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 316
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 1.0,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 0.0,
|
|
"completions/mean_length": 1500.0,
|
|
"completions/mean_terminated_length": 0.0,
|
|
"completions/min_length": 1500.0,
|
|
"completions/min_terminated_length": 0.0,
|
|
"epoch": 0.3969943644333125,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.8884496894175316,
|
|
"kl": 0.00472259521484375,
|
|
"learning_rate": 7.812144644939948e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 16868629.0,
|
|
"reward": 2.9802322387695312e-08,
|
|
"reward_std": 0.9699341058731079,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 2.9802322387695312e-08,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.024920589109913467,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09830967886668995,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 317
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1493.0,
|
|
"completions/mean_length": 1401.8125,
|
|
"completions/mean_terminated_length": 1342.9000244140625,
|
|
"completions/min_length": 1196.0,
|
|
"completions/min_terminated_length": 1196.0,
|
|
"epoch": 0.39824671258609895,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.4263971987387944,
|
|
"kl": 0.00539398193359375,
|
|
"learning_rate": 7.795254292454546e-07,
|
|
"loss": -0.0029,
|
|
"num_tokens": 16930194.0,
|
|
"reward": 3.725290298461914e-09,
|
|
"reward_std": 1.058499813079834,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 3.725290298461914e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.003584071693735027,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.06412609719118169,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5625,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.51234753829798,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7041666666666666,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.09727776191382574,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 318
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.4375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1327.0,
|
|
"completions/mean_length": 1266.875,
|
|
"completions/mean_terminated_length": 1085.5555419921875,
|
|
"completions/min_length": 632.0,
|
|
"completions/min_terminated_length": 632.0,
|
|
"epoch": 0.3994990607388854,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 2.6400441912231263,
|
|
"kl": 0.00444793701171875,
|
|
"learning_rate": 7.778320119998535e-07,
|
|
"loss": -0.121,
|
|
"num_tokens": 16979440.0,
|
|
"reward": -9.313225746154785e-09,
|
|
"reward_std": 1.0413284301757812,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": -9.313225746154785e-09,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": -0.005981072426200435,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.04425931042175955,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.5,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.5163977794943223,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.8166666666666667,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.0926962382871743,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 319
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.375,
|
|
"completions/max_length": 1500.0,
|
|
"completions/max_terminated_length": 1494.0,
|
|
"completions/mean_length": 1350.1875,
|
|
"completions/mean_terminated_length": 1260.300048828125,
|
|
"completions/min_length": 1061.0,
|
|
"completions/min_terminated_length": 1061.0,
|
|
"epoch": 0.4007514088916719,
|
|
"frac_reward_zero_std": 0.0,
|
|
"grad_norm": 3.2125264454232823,
|
|
"kl": 0.00494384765625,
|
|
"learning_rate": 7.761342450871578e-07,
|
|
"loss": -0.0401,
|
|
"num_tokens": 17023723.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.9411365389823914,
|
|
"rewards/wordcountpos_reward_ecommerce/mean": 0.0,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/mean": 0.08138630489162721,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_geo/std": 0.09236477000312811,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/mean": 0.3125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_keypoint/std": 0.47871355387816905,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/mean": 0.7125,
|
|
"rewards/wordcountpos_reward_ecommerce/raw_rule/std": 0.14548768561863465,
|
|
"rewards/wordcountpos_reward_ecommerce/std": 1.0327955484390259,
|
|
"step": 320
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 799,
|
|
"num_input_tokens_seen": 17023723,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 80,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|