Files

315 lines
12 KiB
JSON
Raw Permalink Normal View History

[
{
"step": 1,
"loss": -0.0,
"grad_norm": 0.0,
"learning_rate": 0.0,
"num_tokens": 6155.0,
"completions/mean_length": 18.5,
"completions/min_length": 13.0,
"completions/max_length": 24.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 18.5,
"completions/min_terminated_length": 13.0,
"completions/max_terminated_length": 24.0,
"rewards/reward_total/mean": 0.8737499713897705,
"rewards/reward_total/std": 0.014071441255509853,
"rewards/reward_market/mean": 0.6000000238418579,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.20000000298023224,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.07375000417232513,
"rewards/reward_showroom/std": 0.01407142635434866,
"reward": 0.8737499713897705,
"reward_std": 0.014071442186832428,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 4.520341396331787,
"sampling/sampling_logp_difference/max": 27.174238204956055,
"sampling/importance_sampling_ratio/min": 4.035991810979052e-40,
"sampling/importance_sampling_ratio/mean": 3.371377950408304e-34,
"sampling/importance_sampling_ratio/max": 6.742751768219281e-34,
"entropy": 0.1811772882938385,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 4.618328085001849,
"epoch": 0.0033333333333333335
},
{
"step": 2,
"loss": 0.0,
"grad_norm": 0.0,
"learning_rate": 5.000000000000001e-07,
"num_tokens": 13990.0,
"completions/mean_length": 20.5,
"completions/min_length": 13.0,
"completions/max_length": 28.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 20.5,
"completions/min_terminated_length": 13.0,
"completions/max_terminated_length": 28.0,
"rewards/reward_total/mean": 0.9020000100135803,
"rewards/reward_total/std": 0.04058792069554329,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.10199999809265137,
"rewards/reward_showroom/std": 0.04058793559670448,
"reward": 0.9020000100135803,
"reward_std": 0.04058792069554329,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 4.2938385009765625,
"sampling/sampling_logp_difference/max": 26.646509170532227,
"sampling/importance_sampling_ratio/min": 2.0038568039844884e-43,
"sampling/importance_sampling_ratio/mean": 9.08055335179305e-34,
"sampling/importance_sampling_ratio/max": 1.81611067035861e-33,
"entropy": 0.11802829056978226,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 6.2618235270019795,
"epoch": 0.006666666666666667
},
{
"step": 3,
"loss": 0.0,
"grad_norm": 0.0,
"learning_rate": 1.0000000000000002e-06,
"num_tokens": 27189.0,
"completions/mean_length": 47.5,
"completions/min_length": 40.0,
"completions/max_length": 55.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 47.5,
"completions/min_terminated_length": 40.0,
"completions/max_terminated_length": 55.0,
"rewards/reward_total/mean": 0.800000011920929,
"rewards/reward_total/std": 0.0,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.0,
"rewards/reward_showroom/std": 0.0,
"reward": 0.800000011920929,
"reward_std": 0.0,
"frac_reward_zero_std": 1.0,
"sampling/sampling_logp_difference/mean": 2.252011299133301,
"sampling/sampling_logp_difference/max": 28.722728729248047,
"sampling/importance_sampling_ratio/min": 0.0,
"sampling/importance_sampling_ratio/mean": 5.605193857299268e-45,
"sampling/importance_sampling_ratio/max": 1.2611686178923354e-44,
"entropy": 0.12017613649368286,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 10.559301583001798,
"epoch": 0.01
},
{
"step": 4,
"loss": 0.0,
"grad_norm": 0.0,
"learning_rate": 1.5e-06,
"num_tokens": 32537.0,
"completions/mean_length": 16.0,
"completions/min_length": 16.0,
"completions/max_length": 16.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 16.0,
"completions/min_terminated_length": 16.0,
"completions/max_terminated_length": 16.0,
"rewards/reward_total/mean": 0.8726999759674072,
"rewards/reward_total/std": 0.008343853987753391,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.07269999384880066,
"rewards/reward_showroom/std": 0.008343859575688839,
"reward": 0.8726999759674072,
"reward_std": 0.008343853987753391,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 6.424993515014648,
"sampling/sampling_logp_difference/max": 30.274734497070312,
"sampling/importance_sampling_ratio/min": 1.401298464324817e-45,
"sampling/importance_sampling_ratio/mean": 2.802596928649634e-45,
"sampling/importance_sampling_ratio/max": 2.802596928649634e-45,
"entropy": 0.17401638627052307,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 1.9552808339976764,
"epoch": 0.013333333333333334
},
{
"step": 5,
"loss": -0.0,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000003e-06,
"num_tokens": 41360.0,
"completions/mean_length": 32.0,
"completions/min_length": 12.0,
"completions/max_length": 52.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 32.0,
"completions/min_terminated_length": 12.0,
"completions/max_terminated_length": 52.0,
"rewards/reward_total/mean": 0.8379999995231628,
"rewards/reward_total/std": 0.05374009534716606,
"rewards/reward_market/mean": 0.6000000238418579,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.20000000298023224,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.03799999877810478,
"rewards/reward_showroom/std": 0.053740113973617554,
"reward": 0.8379999995231628,
"reward_std": 0.05374009534716606,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 3.4955575466156006,
"sampling/sampling_logp_difference/max": 31.43513298034668,
"sampling/importance_sampling_ratio/min": 0.0,
"sampling/importance_sampling_ratio/mean": 2.3879863577791495e-32,
"sampling/importance_sampling_ratio/max": 4.775972715558299e-32,
"entropy": 0.10594719648361206,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 8.467385248000937,
"epoch": 0.016666666666666666
},
{
"step": 6,
"loss": 0.0,
"grad_norm": 0.0,
"learning_rate": 2.5e-06,
"num_tokens": 53669.0,
"completions/mean_length": 47.0,
"completions/min_length": 34.0,
"completions/max_length": 60.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 47.0,
"completions/min_terminated_length": 34.0,
"completions/max_terminated_length": 60.0,
"rewards/reward_total/mean": 0.800000011920929,
"rewards/reward_total/std": 0.0,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.0,
"rewards/reward_showroom/std": 0.0,
"reward": 0.800000011920929,
"reward_std": 0.0,
"frac_reward_zero_std": 1.0,
"sampling/sampling_logp_difference/mean": 2.4156625270843506,
"sampling/sampling_logp_difference/max": 28.952373504638672,
"sampling/importance_sampling_ratio/min": 0.0,
"sampling/importance_sampling_ratio/mean": 1.090479293292958e-33,
"sampling/importance_sampling_ratio/max": 2.180958586585916e-33,
"entropy": 0.1315789371728897,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 8.83436526999867,
"epoch": 0.02
},
{
"step": 7,
"loss": -0.0,
"grad_norm": 0.0,
"learning_rate": 3e-06,
"num_tokens": 62446.0,
"completions/mean_length": 26.0,
"completions/min_length": 13.0,
"completions/max_length": 39.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 26.0,
"completions/min_terminated_length": 13.0,
"completions/max_terminated_length": 39.0,
"rewards/reward_total/mean": 0.8481500148773193,
"rewards/reward_total/std": 0.024678032845258713,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.04814999923110008,
"rewards/reward_showroom/std": 0.024678027257323265,
"reward": 0.8481500148773193,
"reward_std": 0.024678032845258713,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 3.9407880306243896,
"sampling/sampling_logp_difference/max": 28.3719482421875,
"sampling/importance_sampling_ratio/min": 0.0,
"sampling/importance_sampling_ratio/mean": 6.8739474988531065e-34,
"sampling/importance_sampling_ratio/max": 1.3747894997706213e-33,
"entropy": 0.10619711875915527,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 8.326133659000334,
"epoch": 0.023333333333333334
},
{
"step": 8,
"loss": 0.0,
"grad_norm": 0.0,
"learning_rate": 3.5e-06,
"num_tokens": 72064.0,
"completions/mean_length": 29.5,
"completions/min_length": 19.0,
"completions/max_length": 40.0,
"completions/clipped_ratio": 0.0,
"completions/mean_terminated_length": 29.5,
"completions/min_terminated_length": 19.0,
"completions/max_terminated_length": 40.0,
"rewards/reward_total/mean": 0.8438500165939331,
"rewards/reward_total/std": 0.06201327219605446,
"rewards/reward_market/mean": 0.20000000298023224,
"rewards/reward_market/std": 0.0,
"rewards/reward_warehouse/mean": 0.6000000238418579,
"rewards/reward_warehouse/std": 0.0,
"rewards/reward_showroom/mean": 0.04385000094771385,
"rewards/reward_showroom/std": 0.06201326474547386,
"reward": 0.8438500165939331,
"reward_std": 0.06201327219605446,
"frac_reward_zero_std": 0.0,
"sampling/sampling_logp_difference/mean": 3.4995782375335693,
"sampling/sampling_logp_difference/max": 30.46342658996582,
"sampling/importance_sampling_ratio/min": 7.006492321624085e-45,
"sampling/importance_sampling_ratio/mean": 8.407790785948902e-45,
"sampling/importance_sampling_ratio/max": 9.80908925027372e-45,
"entropy": 0.11579056829214096,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/region_mean": 0.0,
"step_time": 8.365506213998742,
"epoch": 0.02666666666666667
},
{
"step": 8,
"train_runtime": 91.2182,
"train_samples_per_second": 0.175,
"train_steps_per_second": 0.088,
"total_flos": 0.0,
"train_loss": -7.903189287664419e-34,
"epoch": 0.02666666666666667
}
]