2552 lines
71 KiB
JSON
2552 lines
71 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1141,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.030800821355236138,
|
|
"grad_norm": 10.78951034545919,
|
|
"learning_rate": 1.391304347826087e-06,
|
|
"loss": 0.8755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676461935043335,
|
|
"step": 5,
|
|
"valid_targets_mean": 8280.3,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 0.061601642710472276,
|
|
"grad_norm": 8.246410041446428,
|
|
"learning_rate": 3.130434782608696e-06,
|
|
"loss": 0.8526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24272847175598145,
|
|
"step": 10,
|
|
"valid_targets_mean": 8654.6,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 0.09240246406570841,
|
|
"grad_norm": 3.9672877405460905,
|
|
"learning_rate": 4.869565217391305e-06,
|
|
"loss": 0.7895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24913868308067322,
|
|
"step": 15,
|
|
"valid_targets_mean": 10789.0,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.12320328542094455,
|
|
"grad_norm": 1.8110786022210992,
|
|
"learning_rate": 6.6086956521739135e-06,
|
|
"loss": 0.7282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23902273178100586,
|
|
"step": 20,
|
|
"valid_targets_mean": 10279.2,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 0.1540041067761807,
|
|
"grad_norm": 1.410577563759979,
|
|
"learning_rate": 8.347826086956522e-06,
|
|
"loss": 0.7136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23549094796180725,
|
|
"step": 25,
|
|
"valid_targets_mean": 9453.8,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 0.18480492813141683,
|
|
"grad_norm": 1.053306513462499,
|
|
"learning_rate": 1.008695652173913e-05,
|
|
"loss": 0.6777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22287440299987793,
|
|
"step": 30,
|
|
"valid_targets_mean": 9982.4,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 0.21560574948665298,
|
|
"grad_norm": 0.6869651188569896,
|
|
"learning_rate": 1.182608695652174e-05,
|
|
"loss": 0.6437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22780485451221466,
|
|
"step": 35,
|
|
"valid_targets_mean": 11011.3,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 0.2464065708418891,
|
|
"grad_norm": 0.5729076954739157,
|
|
"learning_rate": 1.3565217391304348e-05,
|
|
"loss": 0.6324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20696070790290833,
|
|
"step": 40,
|
|
"valid_targets_mean": 9971.0,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 0.27720739219712526,
|
|
"grad_norm": 0.4883774957300283,
|
|
"learning_rate": 1.5304347826086958e-05,
|
|
"loss": 0.6134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2091033160686493,
|
|
"step": 45,
|
|
"valid_targets_mean": 10380.5,
|
|
"valid_targets_min": 3726
|
|
},
|
|
{
|
|
"epoch": 0.3080082135523614,
|
|
"grad_norm": 0.3598292149158693,
|
|
"learning_rate": 1.7043478260869566e-05,
|
|
"loss": 0.5959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17776153981685638,
|
|
"step": 50,
|
|
"valid_targets_mean": 9879.5,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 0.33880903490759756,
|
|
"grad_norm": 0.389165260388212,
|
|
"learning_rate": 1.8782608695652175e-05,
|
|
"loss": 0.5784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20730958878993988,
|
|
"step": 55,
|
|
"valid_targets_mean": 10501.6,
|
|
"valid_targets_min": 3010
|
|
},
|
|
{
|
|
"epoch": 0.36960985626283366,
|
|
"grad_norm": 0.2988363102174585,
|
|
"learning_rate": 2.0521739130434787e-05,
|
|
"loss": 0.5533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18783965706825256,
|
|
"step": 60,
|
|
"valid_targets_mean": 9842.8,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 0.4004106776180698,
|
|
"grad_norm": 0.2683287295477794,
|
|
"learning_rate": 2.2260869565217392e-05,
|
|
"loss": 0.5336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18711069226264954,
|
|
"step": 65,
|
|
"valid_targets_mean": 10642.9,
|
|
"valid_targets_min": 4707
|
|
},
|
|
{
|
|
"epoch": 0.43121149897330596,
|
|
"grad_norm": 0.2404329744552859,
|
|
"learning_rate": 2.4e-05,
|
|
"loss": 0.5327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542397290468216,
|
|
"step": 70,
|
|
"valid_targets_mean": 8437.6,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 0.4620123203285421,
|
|
"grad_norm": 0.25251403548284934,
|
|
"learning_rate": 2.573913043478261e-05,
|
|
"loss": 0.5241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1736878752708435,
|
|
"step": 75,
|
|
"valid_targets_mean": 11317.5,
|
|
"valid_targets_min": 3937
|
|
},
|
|
{
|
|
"epoch": 0.4928131416837782,
|
|
"grad_norm": 0.22646760827022988,
|
|
"learning_rate": 2.747826086956522e-05,
|
|
"loss": 0.5111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17943963408470154,
|
|
"step": 80,
|
|
"valid_targets_mean": 10079.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 0.5236139630390144,
|
|
"grad_norm": 0.22168009683076434,
|
|
"learning_rate": 2.921739130434783e-05,
|
|
"loss": 0.5127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879035234451294,
|
|
"step": 85,
|
|
"valid_targets_mean": 10592.5,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 0.5544147843942505,
|
|
"grad_norm": 0.24822610653380095,
|
|
"learning_rate": 3.0956521739130435e-05,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684880405664444,
|
|
"step": 90,
|
|
"valid_targets_mean": 9292.6,
|
|
"valid_targets_min": 3273
|
|
},
|
|
{
|
|
"epoch": 0.5852156057494866,
|
|
"grad_norm": 0.2398489128545002,
|
|
"learning_rate": 3.269565217391305e-05,
|
|
"loss": 0.4953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20545238256454468,
|
|
"step": 95,
|
|
"valid_targets_mean": 11001.3,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 0.6160164271047228,
|
|
"grad_norm": 0.22960412483563136,
|
|
"learning_rate": 3.443478260869566e-05,
|
|
"loss": 0.49,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17050573229789734,
|
|
"step": 100,
|
|
"valid_targets_mean": 10364.2,
|
|
"valid_targets_min": 3913
|
|
},
|
|
{
|
|
"epoch": 0.6468172484599589,
|
|
"grad_norm": 0.223134537440273,
|
|
"learning_rate": 3.617391304347826e-05,
|
|
"loss": 0.4811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1758454144001007,
|
|
"step": 105,
|
|
"valid_targets_mean": 10234.9,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 0.6776180698151951,
|
|
"grad_norm": 0.23753676152704173,
|
|
"learning_rate": 3.791304347826087e-05,
|
|
"loss": 0.4674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1549699902534485,
|
|
"step": 110,
|
|
"valid_targets_mean": 10514.7,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 0.7084188911704312,
|
|
"grad_norm": 0.24288587071503348,
|
|
"learning_rate": 3.9652173913043484e-05,
|
|
"loss": 0.472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12784242630004883,
|
|
"step": 115,
|
|
"valid_targets_mean": 8451.4,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 0.7392197125256673,
|
|
"grad_norm": 0.2500614038071991,
|
|
"learning_rate": 3.9998499902188776e-05,
|
|
"loss": 0.4739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16550754010677338,
|
|
"step": 120,
|
|
"valid_targets_mean": 10390.9,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 0.7700205338809035,
|
|
"grad_norm": 0.26501606669707445,
|
|
"learning_rate": 3.999240614049903e-05,
|
|
"loss": 0.4708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15240433812141418,
|
|
"step": 125,
|
|
"valid_targets_mean": 9886.8,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 0.8008213552361396,
|
|
"grad_norm": 0.23844861174775395,
|
|
"learning_rate": 3.998162638602725e-05,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16705986857414246,
|
|
"step": 130,
|
|
"valid_targets_mean": 10793.2,
|
|
"valid_targets_min": 4591
|
|
},
|
|
{
|
|
"epoch": 0.8316221765913757,
|
|
"grad_norm": 0.22087987371786275,
|
|
"learning_rate": 3.996616316542537e-05,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15392373502254486,
|
|
"step": 135,
|
|
"valid_targets_mean": 10568.0,
|
|
"valid_targets_min": 4286
|
|
},
|
|
{
|
|
"epoch": 0.8624229979466119,
|
|
"grad_norm": 0.260373661733574,
|
|
"learning_rate": 3.994602010309655e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16058529913425446,
|
|
"step": 140,
|
|
"valid_targets_mean": 10719.8,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 0.893223819301848,
|
|
"grad_norm": 0.2408597877120169,
|
|
"learning_rate": 3.992120192034568e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405722200870514,
|
|
"step": 145,
|
|
"valid_targets_mean": 9775.9,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 0.9240246406570842,
|
|
"grad_norm": 0.2257527377603397,
|
|
"learning_rate": 3.989171443427273e-05,
|
|
"loss": 0.4493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14079983532428741,
|
|
"step": 150,
|
|
"valid_targets_mean": 9406.4,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 0.9548254620123203,
|
|
"grad_norm": 0.2639637342672226,
|
|
"learning_rate": 3.98575645564093e-05,
|
|
"loss": 0.4496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14828550815582275,
|
|
"step": 155,
|
|
"valid_targets_mean": 9482.4,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 0.9856262833675564,
|
|
"grad_norm": 0.2526261707928796,
|
|
"learning_rate": 3.981876029109865e-05,
|
|
"loss": 0.4477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13988760113716125,
|
|
"step": 160,
|
|
"valid_targets_mean": 8873.9,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 1.0123203285420945,
|
|
"grad_norm": 0.25498967898811375,
|
|
"learning_rate": 3.9775310733619544e-05,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458635777235031,
|
|
"step": 165,
|
|
"valid_targets_mean": 10038.5,
|
|
"valid_targets_min": 3952
|
|
},
|
|
{
|
|
"epoch": 1.0431211498973305,
|
|
"grad_norm": 0.2654670242620403,
|
|
"learning_rate": 3.972722606805445e-05,
|
|
"loss": 0.4445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1620415598154068,
|
|
"step": 170,
|
|
"valid_targets_mean": 11246.2,
|
|
"valid_targets_min": 4395
|
|
},
|
|
{
|
|
"epoch": 1.0739219712525667,
|
|
"grad_norm": 0.24113536439654604,
|
|
"learning_rate": 3.967451756490248e-05,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15797394514083862,
|
|
"step": 175,
|
|
"valid_targets_mean": 11217.4,
|
|
"valid_targets_min": 3344
|
|
},
|
|
{
|
|
"epoch": 1.104722792607803,
|
|
"grad_norm": 0.2740746014261754,
|
|
"learning_rate": 3.961719757843773e-05,
|
|
"loss": 0.4434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.150252103805542,
|
|
"step": 180,
|
|
"valid_targets_mean": 9786.6,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 1.1355236139630391,
|
|
"grad_norm": 0.258490919264763,
|
|
"learning_rate": 3.955527954381359e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13429267704486847,
|
|
"step": 185,
|
|
"valid_targets_mean": 9314.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 1.1663244353182751,
|
|
"grad_norm": 0.25135094678661335,
|
|
"learning_rate": 3.948877797391365e-05,
|
|
"loss": 0.4411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15129579603672028,
|
|
"step": 190,
|
|
"valid_targets_mean": 9681.8,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 1.1971252566735113,
|
|
"grad_norm": 0.2241624236803852,
|
|
"learning_rate": 3.941770845595009e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14005209505558014,
|
|
"step": 195,
|
|
"valid_targets_mean": 10555.8,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 1.2279260780287475,
|
|
"grad_norm": 0.23528151967814984,
|
|
"learning_rate": 3.934208764781022e-05,
|
|
"loss": 0.432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13930237293243408,
|
|
"step": 200,
|
|
"valid_targets_mean": 10503.7,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 1.2587268993839835,
|
|
"grad_norm": 0.27002466162348915,
|
|
"learning_rate": 3.9261933274152006e-05,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13504661619663239,
|
|
"step": 205,
|
|
"valid_targets_mean": 9799.6,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.2895277207392197,
|
|
"grad_norm": 0.21563006501429327,
|
|
"learning_rate": 3.917726412224967e-05,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15858903527259827,
|
|
"step": 210,
|
|
"valid_targets_mean": 10780.6,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 1.320328542094456,
|
|
"grad_norm": 0.2394461528414203,
|
|
"learning_rate": 3.908810003759015e-05,
|
|
"loss": 0.4368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396717131137848,
|
|
"step": 215,
|
|
"valid_targets_mean": 10120.3,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 1.351129363449692,
|
|
"grad_norm": 0.2506700840081252,
|
|
"learning_rate": 3.8994461919221514e-05,
|
|
"loss": 0.4267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430070549249649,
|
|
"step": 220,
|
|
"valid_targets_mean": 10176.3,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 1.3819301848049281,
|
|
"grad_norm": 0.23480839371747675,
|
|
"learning_rate": 3.889637171485449e-05,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15943750739097595,
|
|
"step": 225,
|
|
"valid_targets_mean": 11043.3,
|
|
"valid_targets_min": 5157
|
|
},
|
|
{
|
|
"epoch": 1.4127310061601643,
|
|
"grad_norm": 0.23589975763379348,
|
|
"learning_rate": 3.879385241571817e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14591297507286072,
|
|
"step": 230,
|
|
"valid_targets_mean": 10345.9,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 1.4435318275154003,
|
|
"grad_norm": 0.26204876497809454,
|
|
"learning_rate": 3.868692805117113e-05,
|
|
"loss": 0.4288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13737617433071136,
|
|
"step": 235,
|
|
"valid_targets_mean": 10190.5,
|
|
"valid_targets_min": 3107
|
|
},
|
|
{
|
|
"epoch": 1.4743326488706365,
|
|
"grad_norm": 0.2711456241860061,
|
|
"learning_rate": 3.8575623683069195e-05,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13510125875473022,
|
|
"step": 240,
|
|
"valid_targets_mean": 10149.4,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 1.5051334702258727,
|
|
"grad_norm": 0.25547982444880957,
|
|
"learning_rate": 3.845996539989126e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13566002249717712,
|
|
"step": 245,
|
|
"valid_targets_mean": 9541.6,
|
|
"valid_targets_min": 2086
|
|
},
|
|
{
|
|
"epoch": 1.5359342915811087,
|
|
"grad_norm": 0.2480976978769594,
|
|
"learning_rate": 3.833998031062442e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13639628887176514,
|
|
"step": 250,
|
|
"valid_targets_mean": 9303.5,
|
|
"valid_targets_min": 3090
|
|
},
|
|
{
|
|
"epoch": 1.566735112936345,
|
|
"grad_norm": 0.24221709972181102,
|
|
"learning_rate": 3.821569653840995e-05,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14368358254432678,
|
|
"step": 255,
|
|
"valid_targets_mean": 8807.1,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 1.5975359342915811,
|
|
"grad_norm": 0.27099806998138243,
|
|
"learning_rate": 3.808714321395155e-05,
|
|
"loss": 0.4288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16085977852344513,
|
|
"step": 260,
|
|
"valid_targets_mean": 10484.7,
|
|
"valid_targets_min": 3249
|
|
},
|
|
{
|
|
"epoch": 1.6283367556468171,
|
|
"grad_norm": 0.2377874625941885,
|
|
"learning_rate": 3.795435046868745e-05,
|
|
"loss": 0.4234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1193472146987915,
|
|
"step": 265,
|
|
"valid_targets_mean": 8244.7,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 1.6591375770020536,
|
|
"grad_norm": 0.2541397790886549,
|
|
"learning_rate": 3.78173494277279e-05,
|
|
"loss": 0.4159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10338842868804932,
|
|
"step": 270,
|
|
"valid_targets_mean": 7190.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 1.6899383983572895,
|
|
"grad_norm": 0.2524891350308471,
|
|
"learning_rate": 3.767617220255989e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306953728199005,
|
|
"step": 275,
|
|
"valid_targets_mean": 8843.7,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 1.7207392197125255,
|
|
"grad_norm": 0.31674080289717166,
|
|
"learning_rate": 3.753085188352047e-05,
|
|
"loss": 0.4285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15276293456554413,
|
|
"step": 280,
|
|
"valid_targets_mean": 11124.9,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 1.751540041067762,
|
|
"grad_norm": 0.271075195175284,
|
|
"learning_rate": 3.738142253204086e-05,
|
|
"loss": 0.4143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333828717470169,
|
|
"step": 285,
|
|
"valid_targets_mean": 10931.6,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 1.782340862422998,
|
|
"grad_norm": 0.27464953772893264,
|
|
"learning_rate": 3.722791917266273e-05,
|
|
"loss": 0.4243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427472114562988,
|
|
"step": 290,
|
|
"valid_targets_mean": 10074.6,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 1.813141683778234,
|
|
"grad_norm": 0.2453667768754085,
|
|
"learning_rate": 3.707037778482892e-05,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184840977191925,
|
|
"step": 295,
|
|
"valid_targets_mean": 9288.8,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 1.8439425051334704,
|
|
"grad_norm": 0.2517748348569757,
|
|
"learning_rate": 3.690883529445028e-05,
|
|
"loss": 0.4163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13088074326515198,
|
|
"step": 300,
|
|
"valid_targets_mean": 9480.1,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 1.8747433264887063,
|
|
"grad_norm": 0.2468601020156896,
|
|
"learning_rate": 3.6743329565250575e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14135316014289856,
|
|
"step": 305,
|
|
"valid_targets_mean": 9932.8,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 1.9055441478439425,
|
|
"grad_norm": 0.24890855608847368,
|
|
"learning_rate": 3.657389938989173e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13682851195335388,
|
|
"step": 310,
|
|
"valid_targets_mean": 9554.7,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 1.9363449691991788,
|
|
"grad_norm": 0.21662618528514496,
|
|
"learning_rate": 3.6400584480881246e-05,
|
|
"loss": 0.42,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12407977133989334,
|
|
"step": 315,
|
|
"valid_targets_mean": 9492.4,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 1.9671457905544147,
|
|
"grad_norm": 0.2665236408311185,
|
|
"learning_rate": 3.622342546126405e-05,
|
|
"loss": 0.425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16727863252162933,
|
|
"step": 320,
|
|
"valid_targets_mean": 10798.4,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 1.997946611909651,
|
|
"grad_norm": 0.24445750361586888,
|
|
"learning_rate": 3.604246385510088e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13143767416477203,
|
|
"step": 325,
|
|
"valid_targets_mean": 9471.8,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 2.024640657084189,
|
|
"grad_norm": 0.2617499382018992,
|
|
"learning_rate": 3.585774207773557e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601074695587158,
|
|
"step": 330,
|
|
"valid_targets_mean": 10741.7,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 2.055441478439425,
|
|
"grad_norm": 0.25390431014432446,
|
|
"learning_rate": 3.5669303425853325e-05,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14050063490867615,
|
|
"step": 335,
|
|
"valid_targets_mean": 10453.0,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 2.086242299794661,
|
|
"grad_norm": 0.24784537187252367,
|
|
"learning_rate": 3.547719206733252e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13861119747161865,
|
|
"step": 340,
|
|
"valid_targets_mean": 10057.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.1170431211498975,
|
|
"grad_norm": 0.2426581217742219,
|
|
"learning_rate": 3.528145303089221e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406959593296051,
|
|
"step": 345,
|
|
"valid_targets_mean": 10138.0,
|
|
"valid_targets_min": 4464
|
|
},
|
|
{
|
|
"epoch": 2.1478439425051334,
|
|
"grad_norm": 0.24675637675967238,
|
|
"learning_rate": 3.508213219553793e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14610439538955688,
|
|
"step": 350,
|
|
"valid_targets_mean": 10286.1,
|
|
"valid_targets_min": 4512
|
|
},
|
|
{
|
|
"epoch": 2.1786447638603694,
|
|
"grad_norm": 0.2440931373764094,
|
|
"learning_rate": 3.4879276279808163e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13264574110507965,
|
|
"step": 355,
|
|
"valid_targets_mean": 8596.7,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 2.209445585215606,
|
|
"grad_norm": 0.245025185185539,
|
|
"learning_rate": 3.4672932830824056e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417609453201294,
|
|
"step": 360,
|
|
"valid_targets_mean": 10969.4,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 2.240246406570842,
|
|
"grad_norm": 0.24876551307938133,
|
|
"learning_rate": 3.446315021314494e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14342427253723145,
|
|
"step": 365,
|
|
"valid_targets_mean": 9465.1,
|
|
"valid_targets_min": 2103
|
|
},
|
|
{
|
|
"epoch": 2.2710472279260783,
|
|
"grad_norm": 0.26795367396469427,
|
|
"learning_rate": 3.424997759743217e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12778609991073608,
|
|
"step": 370,
|
|
"valid_targets_mean": 9634.5,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 2.3018480492813143,
|
|
"grad_norm": 0.23421853162542677,
|
|
"learning_rate": 3.403346494892411e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13451623916625977,
|
|
"step": 375,
|
|
"valid_targets_mean": 9650.5,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 2.3326488706365502,
|
|
"grad_norm": 0.2693457105084275,
|
|
"learning_rate": 3.381366301572489e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12630873918533325,
|
|
"step": 380,
|
|
"valid_targets_mean": 10074.8,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.3634496919917867,
|
|
"grad_norm": 0.2328615075804371,
|
|
"learning_rate": 3.359062331690956e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336540758609772,
|
|
"step": 385,
|
|
"valid_targets_mean": 9906.7,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.3942505133470227,
|
|
"grad_norm": 0.23998410697733752,
|
|
"learning_rate": 3.336439813044861e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14154762029647827,
|
|
"step": 390,
|
|
"valid_targets_mean": 9435.9,
|
|
"valid_targets_min": 2754
|
|
},
|
|
{
|
|
"epoch": 2.4250513347022586,
|
|
"grad_norm": 0.24295648286477237,
|
|
"learning_rate": 3.313504048095469e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13870443403720856,
|
|
"step": 395,
|
|
"valid_targets_mean": 9355.4,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 2.455852156057495,
|
|
"grad_norm": 0.25647709210106184,
|
|
"learning_rate": 3.2902604127254134e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14127278327941895,
|
|
"step": 400,
|
|
"valid_targets_mean": 10922.0,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 2.486652977412731,
|
|
"grad_norm": 0.24788114923573965,
|
|
"learning_rate": 3.266714354978657e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11631830036640167,
|
|
"step": 405,
|
|
"valid_targets_mean": 9566.3,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 2.517453798767967,
|
|
"grad_norm": 0.22799816622722477,
|
|
"learning_rate": 3.24287139378353e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390194594860077,
|
|
"step": 410,
|
|
"valid_targets_mean": 10234.0,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 2.5482546201232035,
|
|
"grad_norm": 0.2484370468858101,
|
|
"learning_rate": 3.2187371176591574e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12273897975683212,
|
|
"step": 415,
|
|
"valid_targets_mean": 9935.5,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 2.5790554414784395,
|
|
"grad_norm": 0.2791429635859173,
|
|
"learning_rate": 3.194317183405573e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13965481519699097,
|
|
"step": 420,
|
|
"valid_targets_mean": 10705.2,
|
|
"valid_targets_min": 2372
|
|
},
|
|
{
|
|
"epoch": 2.6098562628336754,
|
|
"grad_norm": 0.2444287470019243,
|
|
"learning_rate": 3.169617314777828e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428462415933609,
|
|
"step": 425,
|
|
"valid_targets_mean": 10718.3,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 2.640657084188912,
|
|
"grad_norm": 0.2791873926431355,
|
|
"learning_rate": 3.14464330114441e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15584325790405273,
|
|
"step": 430,
|
|
"valid_targets_mean": 10506.5,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.671457905544148,
|
|
"grad_norm": 0.24786042312840947,
|
|
"learning_rate": 3.119400996130281e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13824349641799927,
|
|
"step": 435,
|
|
"valid_targets_mean": 9737.3,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 2.702258726899384,
|
|
"grad_norm": 0.2188588147632267,
|
|
"learning_rate": 3.093896316244855e-05,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11751606315374374,
|
|
"step": 440,
|
|
"valid_targets_mean": 8301.4,
|
|
"valid_targets_min": 2539
|
|
},
|
|
{
|
|
"epoch": 2.7330595482546203,
|
|
"grad_norm": 0.25311777351511694,
|
|
"learning_rate": 3.0681352394952276e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14293605089187622,
|
|
"step": 445,
|
|
"valid_targets_mean": 10999.3,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.7638603696098563,
|
|
"grad_norm": 0.2507509543463196,
|
|
"learning_rate": 3.0421238039850132e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273411512374878,
|
|
"step": 450,
|
|
"valid_targets_mean": 9301.3,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.7946611909650922,
|
|
"grad_norm": 0.20025231318009487,
|
|
"learning_rate": 3.0158681064990688e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788363337516785,
|
|
"step": 455,
|
|
"valid_targets_mean": 10190.0,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 2.8254620123203287,
|
|
"grad_norm": 0.22451287983352627,
|
|
"learning_rate": 2.9893743010744853e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13523007929325104,
|
|
"step": 460,
|
|
"valid_targets_mean": 10681.0,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 2.8562628336755647,
|
|
"grad_norm": 0.24527472770616648,
|
|
"learning_rate": 2.962648597558147e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14209523797035217,
|
|
"step": 465,
|
|
"valid_targets_mean": 11035.8,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 2.8870636550308006,
|
|
"grad_norm": 0.2040471862649505,
|
|
"learning_rate": 2.935697260151216e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13653475046157837,
|
|
"step": 470,
|
|
"valid_targets_mean": 10036.0,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 2.917864476386037,
|
|
"grad_norm": 0.24970399223730452,
|
|
"learning_rate": 2.9085266059408734e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11486883461475372,
|
|
"step": 475,
|
|
"valid_targets_mean": 8802.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.948665297741273,
|
|
"grad_norm": 0.24585352363230156,
|
|
"learning_rate": 2.8811430034196667e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14084793627262115,
|
|
"step": 480,
|
|
"valid_targets_mean": 10247.7,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 2.979466119096509,
|
|
"grad_norm": 0.2284209488306044,
|
|
"learning_rate": 2.8535528709928076e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12997475266456604,
|
|
"step": 485,
|
|
"valid_targets_mean": 10478.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 3.0061601642710474,
|
|
"grad_norm": 0.28764503180445666,
|
|
"learning_rate": 2.8257626754737703e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12351834774017334,
|
|
"step": 490,
|
|
"valid_targets_mean": 10007.4,
|
|
"valid_targets_min": 2277
|
|
},
|
|
{
|
|
"epoch": 3.0369609856262834,
|
|
"grad_norm": 0.2259345401970643,
|
|
"learning_rate": 2.7977789305685424e-05,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13150466978549957,
|
|
"step": 495,
|
|
"valid_targets_mean": 9635.6,
|
|
"valid_targets_min": 3197
|
|
},
|
|
{
|
|
"epoch": 3.0677618069815193,
|
|
"grad_norm": 0.21654883339869027,
|
|
"learning_rate": 2.7696081953488917e-05,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12963591516017914,
|
|
"step": 500,
|
|
"valid_targets_mean": 10897.4,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 3.0985626283367558,
|
|
"grad_norm": 0.2516668390746283,
|
|
"learning_rate": 2.7412570727149864e-05,
|
|
"loss": 0.393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13977795839309692,
|
|
"step": 505,
|
|
"valid_targets_mean": 9534.0,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 3.1293634496919918,
|
|
"grad_norm": 0.22820277821157164,
|
|
"learning_rate": 2.712732207847757e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10845097154378891,
|
|
"step": 510,
|
|
"valid_targets_mean": 8855.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.1601642710472277,
|
|
"grad_norm": 0.24753089464915212,
|
|
"learning_rate": 2.684040286651338e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11616842448711395,
|
|
"step": 515,
|
|
"valid_targets_mean": 8280.0,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.190965092402464,
|
|
"grad_norm": 0.3045771848951917,
|
|
"learning_rate": 2.655188034185969e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1244233027100563,
|
|
"step": 520,
|
|
"valid_targets_mean": 8514.0,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 3.2217659137577,
|
|
"grad_norm": 0.24513235456822977,
|
|
"learning_rate": 2.626182213091717e-05,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13471511006355286,
|
|
"step": 525,
|
|
"valid_targets_mean": 10048.8,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 3.2525667351129366,
|
|
"grad_norm": 0.22384412934757097,
|
|
"learning_rate": 2.5970296220033894e-05,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12419713288545609,
|
|
"step": 530,
|
|
"valid_targets_mean": 9873.7,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 3.2833675564681726,
|
|
"grad_norm": 0.32631256865781927,
|
|
"learning_rate": 2.5677370939570142e-05,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14734040200710297,
|
|
"step": 535,
|
|
"valid_targets_mean": 11067.4,
|
|
"valid_targets_min": 5211
|
|
},
|
|
{
|
|
"epoch": 3.3141683778234086,
|
|
"grad_norm": 0.2682949436111079,
|
|
"learning_rate": 2.538311494788249e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14371266961097717,
|
|
"step": 540,
|
|
"valid_targets_mean": 9686.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 3.344969199178645,
|
|
"grad_norm": 0.213074341400513,
|
|
"learning_rate": 2.508759721523113e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292104721069336,
|
|
"step": 545,
|
|
"valid_targets_mean": 10185.6,
|
|
"valid_targets_min": 2656
|
|
},
|
|
{
|
|
"epoch": 3.375770020533881,
|
|
"grad_norm": 0.22979047325988197,
|
|
"learning_rate": 2.479088700761398e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13095468282699585,
|
|
"step": 550,
|
|
"valid_targets_mean": 9349.8,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 3.406570841889117,
|
|
"grad_norm": 0.2317226978815973,
|
|
"learning_rate": 2.4493053870531534e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12155196070671082,
|
|
"step": 555,
|
|
"valid_targets_mean": 9454.3,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 3.4373716632443534,
|
|
"grad_norm": 0.2373730952557148,
|
|
"learning_rate": 2.4194167612686208e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13594003021717072,
|
|
"step": 560,
|
|
"valid_targets_mean": 10032.6,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 3.4681724845995894,
|
|
"grad_norm": 0.24854976213675622,
|
|
"learning_rate": 2.389429828961994e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12389802187681198,
|
|
"step": 565,
|
|
"valid_targets_mean": 10126.2,
|
|
"valid_targets_min": 2513
|
|
},
|
|
{
|
|
"epoch": 3.4989733059548254,
|
|
"grad_norm": 0.20719983612273074,
|
|
"learning_rate": 2.3593516187293978e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13517609238624573,
|
|
"step": 570,
|
|
"valid_targets_mean": 11068.8,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 3.529774127310062,
|
|
"grad_norm": 0.2161162868640456,
|
|
"learning_rate": 2.329189180561468e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12443984299898148,
|
|
"step": 575,
|
|
"valid_targets_mean": 10574.7,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 3.560574948665298,
|
|
"grad_norm": 0.22111992371927122,
|
|
"learning_rate": 2.298949584190909e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13051998615264893,
|
|
"step": 580,
|
|
"valid_targets_mean": 10700.1,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 3.5913757700205338,
|
|
"grad_norm": 0.21764605005096452,
|
|
"learning_rate": 2.2686399174354313e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12670505046844482,
|
|
"step": 585,
|
|
"valid_targets_mean": 10135.7,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 3.62217659137577,
|
|
"grad_norm": 0.21319458178377276,
|
|
"learning_rate": 2.2382672845364474e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14208604395389557,
|
|
"step": 590,
|
|
"valid_targets_mean": 12013.0,
|
|
"valid_targets_min": 4995
|
|
},
|
|
{
|
|
"epoch": 3.652977412731006,
|
|
"grad_norm": 0.26122804480818995,
|
|
"learning_rate": 2.2078388044939193e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12731271982192993,
|
|
"step": 595,
|
|
"valid_targets_mean": 10844.6,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 3.683778234086242,
|
|
"grad_norm": 0.22781179096787188,
|
|
"learning_rate": 2.1773616093977433e-05,
|
|
"loss": 0.393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12237831950187683,
|
|
"step": 600,
|
|
"valid_targets_mean": 9943.8,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 3.7145790554414786,
|
|
"grad_norm": 0.22728502345335605,
|
|
"learning_rate": 2.146842842756071e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511116325855255,
|
|
"step": 605,
|
|
"valid_targets_mean": 11172.6,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 3.7453798767967146,
|
|
"grad_norm": 0.22097946670719984,
|
|
"learning_rate": 2.1162896578209517e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14141415059566498,
|
|
"step": 610,
|
|
"valid_targets_mean": 10525.3,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 3.7761806981519506,
|
|
"grad_norm": 0.2295100609487947,
|
|
"learning_rate": 2.0857092159116927e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12614914774894714,
|
|
"step": 615,
|
|
"valid_targets_mean": 9766.1,
|
|
"valid_targets_min": 2992
|
|
},
|
|
{
|
|
"epoch": 3.806981519507187,
|
|
"grad_norm": 0.22721432401492533,
|
|
"learning_rate": 2.0551086847363245e-05,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13179969787597656,
|
|
"step": 620,
|
|
"valid_targets_mean": 10949.6,
|
|
"valid_targets_min": 4792
|
|
},
|
|
{
|
|
"epoch": 3.837782340862423,
|
|
"grad_norm": 0.22078763844825,
|
|
"learning_rate": 2.024495236711574e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12299636006355286,
|
|
"step": 625,
|
|
"valid_targets_mean": 9665.4,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 3.868583162217659,
|
|
"grad_norm": 0.22914339313844653,
|
|
"learning_rate": 1.993876047281731e-05,
|
|
"loss": 0.394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12025485187768936,
|
|
"step": 630,
|
|
"valid_targets_mean": 8838.8,
|
|
"valid_targets_min": 3422
|
|
},
|
|
{
|
|
"epoch": 3.8993839835728954,
|
|
"grad_norm": 0.2302518030075612,
|
|
"learning_rate": 1.963258293236805e-05,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.123673215508461,
|
|
"step": 635,
|
|
"valid_targets_mean": 10010.0,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 3.9301848049281314,
|
|
"grad_norm": 0.21544041332713842,
|
|
"learning_rate": 1.9326491510303694e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12482661008834839,
|
|
"step": 640,
|
|
"valid_targets_mean": 9535.8,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.9609856262833674,
|
|
"grad_norm": 0.2015648862338233,
|
|
"learning_rate": 1.902055795097477e-05,
|
|
"loss": 0.3889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14611949026584625,
|
|
"step": 645,
|
|
"valid_targets_mean": 11163.8,
|
|
"valid_targets_min": 5062
|
|
},
|
|
{
|
|
"epoch": 3.991786447638604,
|
|
"grad_norm": 0.22187245313872425,
|
|
"learning_rate": 1.87148539617306e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460841238498688,
|
|
"step": 650,
|
|
"valid_targets_mean": 8963.2,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.018480492813142,
|
|
"grad_norm": 0.22476334343242654,
|
|
"learning_rate": 1.840945119611188e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16152283549308777,
|
|
"step": 655,
|
|
"valid_targets_mean": 11521.0,
|
|
"valid_targets_min": 3049
|
|
},
|
|
{
|
|
"epoch": 4.049281314168378,
|
|
"grad_norm": 0.21839067338296778,
|
|
"learning_rate": 1.8104421237055933e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1331462264060974,
|
|
"step": 660,
|
|
"valid_targets_mean": 9382.9,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 4.080082135523614,
|
|
"grad_norm": 0.24094322681397756,
|
|
"learning_rate": 1.7799835580118416e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12497024983167648,
|
|
"step": 665,
|
|
"valid_targets_mean": 8761.0,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 4.11088295687885,
|
|
"grad_norm": 0.22795248591570674,
|
|
"learning_rate": 1.7495765616715656e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485755890607834,
|
|
"step": 670,
|
|
"valid_targets_mean": 10701.9,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 4.1416837782340865,
|
|
"grad_norm": 0.20397983901648906,
|
|
"learning_rate": 1.719228261739126e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12895135581493378,
|
|
"step": 675,
|
|
"valid_targets_mean": 10493.6,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 4.172484599589322,
|
|
"grad_norm": 0.2114871061106328,
|
|
"learning_rate": 1.6889457715111144e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12499860674142838,
|
|
"step": 680,
|
|
"valid_targets_mean": 10399.1,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 4.2032854209445585,
|
|
"grad_norm": 0.22161472927911963,
|
|
"learning_rate": 1.6587361888590718e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11722412705421448,
|
|
"step": 685,
|
|
"valid_targets_mean": 9381.0,
|
|
"valid_targets_min": 3469
|
|
},
|
|
{
|
|
"epoch": 4.234086242299795,
|
|
"grad_norm": 0.22760003186393898,
|
|
"learning_rate": 1.6286065945658368e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12102054059505463,
|
|
"step": 690,
|
|
"valid_targets_mean": 9244.2,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 4.2648870636550305,
|
|
"grad_norm": 0.19333947497396198,
|
|
"learning_rate": 1.598564050665882e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13309797644615173,
|
|
"step": 695,
|
|
"valid_targets_mean": 10669.2,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 4.295687885010267,
|
|
"grad_norm": 0.241912003794477,
|
|
"learning_rate": 1.5686155987900604e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11429376900196075,
|
|
"step": 700,
|
|
"valid_targets_mean": 9385.2,
|
|
"valid_targets_min": 4513
|
|
},
|
|
{
|
|
"epoch": 4.326488706365503,
|
|
"grad_norm": 0.20300102211827217,
|
|
"learning_rate": 1.53876825851512e-05,
|
|
"loss": 0.3915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12321654707193375,
|
|
"step": 705,
|
|
"valid_targets_mean": 8934.8,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 4.357289527720739,
|
|
"grad_norm": 0.2217694923947379,
|
|
"learning_rate": 1.5090290257184019e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13099467754364014,
|
|
"step": 710,
|
|
"valid_targets_mean": 9030.1,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 4.388090349075975,
|
|
"grad_norm": 0.48428449706252485,
|
|
"learning_rate": 1.4794048709380816e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13166169822216034,
|
|
"step": 715,
|
|
"valid_targets_mean": 10115.8,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 4.418891170431212,
|
|
"grad_norm": 0.1935843270430586,
|
|
"learning_rate": 1.4499027377393571e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13026298582553864,
|
|
"step": 720,
|
|
"valid_targets_mean": 10494.2,
|
|
"valid_targets_min": 2821
|
|
},
|
|
{
|
|
"epoch": 4.449691991786447,
|
|
"grad_norm": 0.22093960967485268,
|
|
"learning_rate": 1.420529541086951e-05,
|
|
"loss": 0.3845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14177194237709045,
|
|
"step": 725,
|
|
"valid_targets_mean": 10633.6,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 4.480492813141684,
|
|
"grad_norm": 0.2115501533825174,
|
|
"learning_rate": 1.3912921657243282e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11877399682998657,
|
|
"step": 730,
|
|
"valid_targets_mean": 9276.1,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 4.51129363449692,
|
|
"grad_norm": 0.1915086121574694,
|
|
"learning_rate": 1.3621974645599854e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12440060824155807,
|
|
"step": 735,
|
|
"valid_targets_mean": 10263.1,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 4.5420944558521565,
|
|
"grad_norm": 0.19955698666287677,
|
|
"learning_rate": 1.3332522570612097e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11496643722057343,
|
|
"step": 740,
|
|
"valid_targets_mean": 8955.9,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 4.572895277207392,
|
|
"grad_norm": 0.21278349081356462,
|
|
"learning_rate": 1.3044633276556695e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12588758766651154,
|
|
"step": 745,
|
|
"valid_targets_mean": 10313.9,
|
|
"valid_targets_min": 4151
|
|
},
|
|
{
|
|
"epoch": 4.6036960985626285,
|
|
"grad_norm": 0.23757114734271179,
|
|
"learning_rate": 1.2758374241412276e-05,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12705953419208527,
|
|
"step": 750,
|
|
"valid_targets_mean": 8989.1,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 4.634496919917865,
|
|
"grad_norm": 0.23378779608166858,
|
|
"learning_rate": 1.2473812561043293e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282326877117157,
|
|
"step": 755,
|
|
"valid_targets_mean": 9478.4,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 4.6652977412731005,
|
|
"grad_norm": 0.1876324920782784,
|
|
"learning_rate": 1.2191014933473526e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655007302761078,
|
|
"step": 760,
|
|
"valid_targets_mean": 9860.2,
|
|
"valid_targets_min": 3490
|
|
},
|
|
{
|
|
"epoch": 4.696098562628337,
|
|
"grad_norm": 0.2024605395439328,
|
|
"learning_rate": 1.1910047643252822e-05,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12183412909507751,
|
|
"step": 765,
|
|
"valid_targets_mean": 9874.6,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 4.726899383983573,
|
|
"grad_norm": 0.2240778795739287,
|
|
"learning_rate": 1.1630976545920777e-05,
|
|
"loss": 0.379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13441401720046997,
|
|
"step": 770,
|
|
"valid_targets_mean": 11569.1,
|
|
"valid_targets_min": 3648
|
|
},
|
|
{
|
|
"epoch": 4.757700205338809,
|
|
"grad_norm": 0.19443047557328758,
|
|
"learning_rate": 1.1353867052570915e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342424750328064,
|
|
"step": 775,
|
|
"valid_targets_mean": 10856.6,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.788501026694045,
|
|
"grad_norm": 0.2271657443086,
|
|
"learning_rate": 1.1078784114519072e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13600531220436096,
|
|
"step": 780,
|
|
"valid_targets_mean": 10584.8,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.819301848049282,
|
|
"grad_norm": 0.21153458923939814,
|
|
"learning_rate": 1.0805792208079553e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11498244106769562,
|
|
"step": 785,
|
|
"valid_targets_mean": 9707.3,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 4.850102669404517,
|
|
"grad_norm": 0.19691943715186438,
|
|
"learning_rate": 1.0534955319452638e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14030292630195618,
|
|
"step": 790,
|
|
"valid_targets_mean": 11372.9,
|
|
"valid_targets_min": 3355
|
|
},
|
|
{
|
|
"epoch": 4.880903490759754,
|
|
"grad_norm": 0.20881508297490065,
|
|
"learning_rate": 1.0266336929726899e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13601678609848022,
|
|
"step": 795,
|
|
"valid_targets_mean": 10858.4,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 4.91170431211499,
|
|
"grad_norm": 0.23817907360477347,
|
|
"learning_rate": 1.0000000000000006e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13049688935279846,
|
|
"step": 800,
|
|
"valid_targets_mean": 9990.5,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 4.942505133470226,
|
|
"grad_norm": 0.21302030580553924,
|
|
"learning_rate": 9.736006956621302e-06,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14719609916210175,
|
|
"step": 805,
|
|
"valid_targets_mean": 11068.6,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 4.973305954825462,
|
|
"grad_norm": 0.20406255452776648,
|
|
"learning_rate": 9.474419676559846e-06,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11775431036949158,
|
|
"step": 810,
|
|
"valid_targets_mean": 9648.8,
|
|
"valid_targets_min": 3376
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.31881592188028646,
|
|
"learning_rate": 9.215299472901078e-06,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4071720242500305,
|
|
"step": 815,
|
|
"valid_targets_mean": 10948.0,
|
|
"valid_targets_min": 4411
|
|
},
|
|
{
|
|
"epoch": 5.030800821355236,
|
|
"grad_norm": 0.21230100597619836,
|
|
"learning_rate": 8.958707080475806e-06,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11637009680271149,
|
|
"step": 820,
|
|
"valid_targets_mean": 9163.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 5.061601642710472,
|
|
"grad_norm": 0.20804828235766026,
|
|
"learning_rate": 8.704702641624581e-06,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325514316558838,
|
|
"step": 825,
|
|
"valid_targets_mean": 9673.5,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.092402464065708,
|
|
"grad_norm": 0.19984821989380258,
|
|
"learning_rate": 8.453345692101076e-06,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14904968440532684,
|
|
"step": 830,
|
|
"valid_targets_mean": 10810.3,
|
|
"valid_targets_min": 3846
|
|
},
|
|
{
|
|
"epoch": 5.123203285420945,
|
|
"grad_norm": 0.1970466702581415,
|
|
"learning_rate": 8.204695147117527e-06,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1161932498216629,
|
|
"step": 835,
|
|
"valid_targets_mean": 9579.4,
|
|
"valid_targets_min": 4289
|
|
},
|
|
{
|
|
"epoch": 5.15400410677618,
|
|
"grad_norm": 0.2100897675843508,
|
|
"learning_rate": 7.958809287535741e-06,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13574275374412537,
|
|
"step": 840,
|
|
"valid_targets_mean": 10023.3,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 5.184804928131417,
|
|
"grad_norm": 0.21910142685280182,
|
|
"learning_rate": 7.715745746206644e-06,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291072815656662,
|
|
"step": 845,
|
|
"valid_targets_mean": 10159.6,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 5.215605749486653,
|
|
"grad_norm": 0.1897236498106727,
|
|
"learning_rate": 7.475561494461882e-06,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12237901240587234,
|
|
"step": 850,
|
|
"valid_targets_mean": 9590.3,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 5.246406570841889,
|
|
"grad_norm": 0.19705449280585383,
|
|
"learning_rate": 7.238312828760312e-06,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13103556632995605,
|
|
"step": 855,
|
|
"valid_targets_mean": 10539.4,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 5.277207392197125,
|
|
"grad_norm": 0.20548584664482955,
|
|
"learning_rate": 7.0040553574928115e-06,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12963303923606873,
|
|
"step": 860,
|
|
"valid_targets_mean": 9509.7,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 5.308008213552362,
|
|
"grad_norm": 0.20230787548228713,
|
|
"learning_rate": 6.772843987948259e-06,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12533414363861084,
|
|
"step": 865,
|
|
"valid_targets_mean": 10205.4,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 5.338809034907597,
|
|
"grad_norm": 0.19960934159328975,
|
|
"learning_rate": 6.544732913443925e-06,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12100720405578613,
|
|
"step": 870,
|
|
"valid_targets_mean": 9763.3,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 5.369609856262834,
|
|
"grad_norm": 0.22208580453128474,
|
|
"learning_rate": 6.319775600623139e-06,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11866745352745056,
|
|
"step": 875,
|
|
"valid_targets_mean": 9620.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 5.40041067761807,
|
|
"grad_norm": 0.20031614807080989,
|
|
"learning_rate": 6.098024776923359e-06,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10915479809045792,
|
|
"step": 880,
|
|
"valid_targets_mean": 9155.7,
|
|
"valid_targets_min": 2445
|
|
},
|
|
{
|
|
"epoch": 5.431211498973306,
|
|
"grad_norm": 0.2001139189820283,
|
|
"learning_rate": 5.87953241821741e-06,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13553708791732788,
|
|
"step": 885,
|
|
"valid_targets_mean": 10978.1,
|
|
"valid_targets_min": 2277
|
|
},
|
|
{
|
|
"epoch": 5.462012320328542,
|
|
"grad_norm": 0.19077714182439787,
|
|
"learning_rate": 5.664349736630979e-06,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424496561288834,
|
|
"step": 890,
|
|
"valid_targets_mean": 9724.1,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 5.492813141683778,
|
|
"grad_norm": 0.220365025104229,
|
|
"learning_rate": 5.452527168539026e-06,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12921196222305298,
|
|
"step": 895,
|
|
"valid_targets_mean": 11099.1,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 5.523613963039015,
|
|
"grad_norm": 0.20329801535781664,
|
|
"learning_rate": 5.244114362744126e-06,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272413074970245,
|
|
"step": 900,
|
|
"valid_targets_mean": 9506.2,
|
|
"valid_targets_min": 3223
|
|
},
|
|
{
|
|
"epoch": 5.55441478439425,
|
|
"grad_norm": 0.19835171577454092,
|
|
"learning_rate": 5.039160168839292e-06,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13050030171871185,
|
|
"step": 905,
|
|
"valid_targets_mean": 9120.8,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 5.585215605749487,
|
|
"grad_norm": 0.19273836833844,
|
|
"learning_rate": 4.837712625758251e-06,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11816704273223877,
|
|
"step": 910,
|
|
"valid_targets_mean": 9898.2,
|
|
"valid_targets_min": 3404
|
|
},
|
|
{
|
|
"epoch": 5.616016427104723,
|
|
"grad_norm": 0.2135071040731185,
|
|
"learning_rate": 4.639818950515598e-06,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13075876235961914,
|
|
"step": 915,
|
|
"valid_targets_mean": 10097.0,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 5.646817248459959,
|
|
"grad_norm": 0.20145187233343603,
|
|
"learning_rate": 4.445525527139725e-06,
|
|
"loss": 0.3888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326562911272049,
|
|
"step": 920,
|
|
"valid_targets_mean": 10346.2,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 5.677618069815195,
|
|
"grad_norm": 0.2026728553310299,
|
|
"learning_rate": 4.2548778958008795e-06,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12102033942937851,
|
|
"step": 925,
|
|
"valid_targets_mean": 10037.0,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 5.708418891170432,
|
|
"grad_norm": 0.22942543241242366,
|
|
"learning_rate": 4.067920742137115e-06,
|
|
"loss": 0.3788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12767393887043,
|
|
"step": 930,
|
|
"valid_targets_mean": 9976.0,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 5.739219712525667,
|
|
"grad_norm": 0.2107947822476004,
|
|
"learning_rate": 3.884697886780437e-06,
|
|
"loss": 0.3763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13081158697605133,
|
|
"step": 935,
|
|
"valid_targets_mean": 10241.8,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 5.770020533880904,
|
|
"grad_norm": 0.20468361463207382,
|
|
"learning_rate": 3.705252275085791e-06,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312338411808014,
|
|
"step": 940,
|
|
"valid_targets_mean": 9789.3,
|
|
"valid_targets_min": 3126
|
|
},
|
|
{
|
|
"epoch": 5.80082135523614,
|
|
"grad_norm": 0.2077142192995124,
|
|
"learning_rate": 3.5296259670651177e-06,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12859410047531128,
|
|
"step": 945,
|
|
"valid_targets_mean": 9755.1,
|
|
"valid_targets_min": 3721
|
|
},
|
|
{
|
|
"epoch": 5.831622176591376,
|
|
"grad_norm": 0.2293598580227486,
|
|
"learning_rate": 3.357860127529e-06,
|
|
"loss": 0.3814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11473749577999115,
|
|
"step": 950,
|
|
"valid_targets_mean": 9844.6,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 5.862422997946612,
|
|
"grad_norm": 0.22647676832017485,
|
|
"learning_rate": 3.1899950164380677e-06,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13261717557907104,
|
|
"step": 955,
|
|
"valid_targets_mean": 9990.8,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 5.8932238193018485,
|
|
"grad_norm": 0.21079531555994593,
|
|
"learning_rate": 3.0260699794665527e-06,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12958693504333496,
|
|
"step": 960,
|
|
"valid_targets_mean": 9744.6,
|
|
"valid_targets_min": 3064
|
|
},
|
|
{
|
|
"epoch": 5.924024640657084,
|
|
"grad_norm": 0.19047352276450594,
|
|
"learning_rate": 2.866123438780073e-06,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10699409246444702,
|
|
"step": 965,
|
|
"valid_targets_mean": 9628.2,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 5.95482546201232,
|
|
"grad_norm": 0.20209054048874064,
|
|
"learning_rate": 2.710192884029954e-06,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11523480713367462,
|
|
"step": 970,
|
|
"valid_targets_mean": 9445.2,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 5.985626283367557,
|
|
"grad_norm": 0.2019234767006319,
|
|
"learning_rate": 2.558314863566043e-06,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11367767304182053,
|
|
"step": 975,
|
|
"valid_targets_mean": 9772.4,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 6.012320328542095,
|
|
"grad_norm": 0.1900395292909151,
|
|
"learning_rate": 2.410524975870221e-06,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10480041801929474,
|
|
"step": 980,
|
|
"valid_targets_mean": 9407.6,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 6.04312114989733,
|
|
"grad_norm": 0.19218447130290486,
|
|
"learning_rate": 2.266857861212499e-06,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330980658531189,
|
|
"step": 985,
|
|
"valid_targets_mean": 10623.0,
|
|
"valid_targets_min": 2736
|
|
},
|
|
{
|
|
"epoch": 6.073921971252567,
|
|
"grad_norm": 0.199658834668884,
|
|
"learning_rate": 2.127347193531757e-06,
|
|
"loss": 0.3788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11302483081817627,
|
|
"step": 990,
|
|
"valid_targets_mean": 8684.1,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 6.104722792607803,
|
|
"grad_norm": 0.18466431044606102,
|
|
"learning_rate": 1.9920256725429275e-06,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12558288872241974,
|
|
"step": 995,
|
|
"valid_targets_mean": 9945.8,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 6.135523613963039,
|
|
"grad_norm": 0.19451755215714164,
|
|
"learning_rate": 1.8609250160725877e-06,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12043657898902893,
|
|
"step": 1000,
|
|
"valid_targets_mean": 9261.3,
|
|
"valid_targets_min": 3221
|
|
},
|
|
{
|
|
"epoch": 6.166324435318275,
|
|
"grad_norm": 0.1989609973315876,
|
|
"learning_rate": 1.7340759526246254e-06,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296187788248062,
|
|
"step": 1005,
|
|
"valid_targets_mean": 10305.9,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 6.1971252566735116,
|
|
"grad_norm": 0.20176202225188247,
|
|
"learning_rate": 1.6115082141778459e-06,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13769292831420898,
|
|
"step": 1010,
|
|
"valid_targets_mean": 10433.3,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.227926078028747,
|
|
"grad_norm": 0.18970780271253246,
|
|
"learning_rate": 1.4932505292171407e-06,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758821785449982,
|
|
"step": 1015,
|
|
"valid_targets_mean": 10472.3,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 6.2587268993839835,
|
|
"grad_norm": 0.19962437444228312,
|
|
"learning_rate": 1.3793306159998498e-06,
|
|
"loss": 0.383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1420556902885437,
|
|
"step": 1020,
|
|
"valid_targets_mean": 10590.9,
|
|
"valid_targets_min": 2254
|
|
},
|
|
{
|
|
"epoch": 6.28952772073922,
|
|
"grad_norm": 0.18873687421623273,
|
|
"learning_rate": 1.2697751760589072e-06,
|
|
"loss": 0.379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13322848081588745,
|
|
"step": 1025,
|
|
"valid_targets_mean": 10306.2,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 6.3203285420944555,
|
|
"grad_norm": 0.1935512888145092,
|
|
"learning_rate": 1.1646098879443124e-06,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14393621683120728,
|
|
"step": 1030,
|
|
"valid_targets_mean": 10398.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 6.351129363449692,
|
|
"grad_norm": 0.18545682928793727,
|
|
"learning_rate": 1.0638594012043834e-06,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11467485129833221,
|
|
"step": 1035,
|
|
"valid_targets_mean": 9599.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.381930184804928,
|
|
"grad_norm": 0.2022710371779159,
|
|
"learning_rate": 9.67547330608165e-07,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10918845981359482,
|
|
"step": 1040,
|
|
"valid_targets_mean": 9210.8,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 6.412731006160164,
|
|
"grad_norm": 0.2206043061190306,
|
|
"learning_rate": 8.756962506103983e-07,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15293879806995392,
|
|
"step": 1045,
|
|
"valid_targets_mean": 11501.6,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 6.4435318275154,
|
|
"grad_norm": 0.18582939615049404,
|
|
"learning_rate": 7.883276900603288e-07,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11088518053293228,
|
|
"step": 1050,
|
|
"valid_targets_mean": 9877.3,
|
|
"valid_targets_min": 3126
|
|
},
|
|
{
|
|
"epoch": 6.474332648870637,
|
|
"grad_norm": 0.1972521643922487,
|
|
"learning_rate": 7.054621271555917e-07,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11362037062644958,
|
|
"step": 1055,
|
|
"valid_targets_mean": 9118.0,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 6.505133470225873,
|
|
"grad_norm": 0.17586915711860904,
|
|
"learning_rate": 6.271189846423543e-07,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12249845266342163,
|
|
"step": 1060,
|
|
"valid_targets_mean": 10817.7,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 6.535934291581109,
|
|
"grad_norm": 0.18416484921166754,
|
|
"learning_rate": 5.533166252628319e-07,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14194294810295105,
|
|
"step": 1065,
|
|
"valid_targets_mean": 11814.9,
|
|
"valid_targets_min": 3005
|
|
},
|
|
{
|
|
"epoch": 6.566735112936345,
|
|
"grad_norm": 0.22274241611101864,
|
|
"learning_rate": 4.840723474512876e-07,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11658580601215363,
|
|
"step": 1070,
|
|
"valid_targets_mean": 8806.6,
|
|
"valid_targets_min": 2184
|
|
},
|
|
{
|
|
"epoch": 6.597535934291582,
|
|
"grad_norm": 0.18488977562314957,
|
|
"learning_rate": 4.1940238127946785e-07,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13311666250228882,
|
|
"step": 1075,
|
|
"valid_targets_mean": 10068.2,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 6.628336755646817,
|
|
"grad_norm": 0.1827839387461324,
|
|
"learning_rate": 3.593218846524571e-07,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13171571493148804,
|
|
"step": 1080,
|
|
"valid_targets_mean": 10453.6,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 6.6591375770020536,
|
|
"grad_norm": 0.1928136072465324,
|
|
"learning_rate": 3.038449397558396e-07,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12778720259666443,
|
|
"step": 1085,
|
|
"valid_targets_mean": 9230.7,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 6.68993839835729,
|
|
"grad_norm": 0.1870233805513281,
|
|
"learning_rate": 2.52984549754991e-07,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12494305521249771,
|
|
"step": 1090,
|
|
"valid_targets_mean": 9985.8,
|
|
"valid_targets_min": 3213
|
|
},
|
|
{
|
|
"epoch": 6.7207392197125255,
|
|
"grad_norm": 0.1918146360124604,
|
|
"learning_rate": 2.0675263574729376e-07,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374993622303009,
|
|
"step": 1095,
|
|
"valid_targets_mean": 10922.1,
|
|
"valid_targets_min": 3853
|
|
},
|
|
{
|
|
"epoch": 6.751540041067762,
|
|
"grad_norm": 0.18053030537193954,
|
|
"learning_rate": 1.6516003396795489e-07,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13119381666183472,
|
|
"step": 1100,
|
|
"valid_targets_mean": 10285.2,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 6.782340862422998,
|
|
"grad_norm": 0.18115468859329534,
|
|
"learning_rate": 1.2821649325012396e-07,
|
|
"loss": 0.3817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12020154297351837,
|
|
"step": 1105,
|
|
"valid_targets_mean": 10192.8,
|
|
"valid_targets_min": 3135
|
|
},
|
|
{
|
|
"epoch": 6.813141683778234,
|
|
"grad_norm": 0.1815765782306925,
|
|
"learning_rate": 9.593067273987456e-08,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480635195970535,
|
|
"step": 1110,
|
|
"valid_targets_mean": 9938.3,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 6.84394250513347,
|
|
"grad_norm": 0.18701855589480754,
|
|
"learning_rate": 6.831013986660307e-08,
|
|
"loss": 0.3769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12354324758052826,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9370.7,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 6.874743326488707,
|
|
"grad_norm": 0.18257135141110437,
|
|
"learning_rate": 4.5361368569301064e-08,
|
|
"loss": 0.3858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11373500525951385,
|
|
"step": 1120,
|
|
"valid_targets_mean": 9538.7,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 6.905544147843942,
|
|
"grad_norm": 0.18518035371992592,
|
|
"learning_rate": 2.7089737779142365e-08,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11957018822431564,
|
|
"step": 1125,
|
|
"valid_targets_mean": 10110.7,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.936344969199179,
|
|
"grad_norm": 0.1931229420258479,
|
|
"learning_rate": 1.349953015872707e-08,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13230648636817932,
|
|
"step": 1130,
|
|
"valid_targets_mean": 10297.8,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 6.967145790554415,
|
|
"grad_norm": 0.19107387956856198,
|
|
"learning_rate": 4.593931098262338e-09,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13124366104602814,
|
|
"step": 1135,
|
|
"valid_targets_mean": 10772.5,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 6.997946611909651,
|
|
"grad_norm": 0.19309149811821025,
|
|
"learning_rate": 3.7502796895516615e-10,
|
|
"loss": 0.3802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12663644552230835,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9650.2,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 1141,
|
|
"total_flos": 4.616861268529644e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 12.7361,
|
|
"train_samples_per_second": 8551.494,
|
|
"train_steps_per_second": 89.588
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1141,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.616861268529644e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|