Model: laion/Qwen3-8B_exp_tas_tmux_large_traces_save-strategy_steps Source: Original Platform
5625 lines
156 KiB
JSON
5625 lines
156 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 8.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2536,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.015772870662460567,
|
|
"grad_norm": 2.169656955227513,
|
|
"learning_rate": 3.0769230769230774e-05,
|
|
"loss": 0.9249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.817986011505127,
|
|
"step": 5,
|
|
"valid_targets_mean": 3434.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.031545741324921134,
|
|
"grad_norm": 0.8559043399532138,
|
|
"learning_rate": 6.923076923076924e-05,
|
|
"loss": 0.7835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7119892835617065,
|
|
"step": 10,
|
|
"valid_targets_mean": 3699.3,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 0.0473186119873817,
|
|
"grad_norm": 0.6757245841747609,
|
|
"learning_rate": 9.99999612380875e-05,
|
|
"loss": 0.6389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6657205820083618,
|
|
"step": 15,
|
|
"valid_targets_mean": 3761.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.06309148264984227,
|
|
"grad_norm": 0.5457612768667142,
|
|
"learning_rate": 9.999860457746025e-05,
|
|
"loss": 0.6338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6094217300415039,
|
|
"step": 20,
|
|
"valid_targets_mean": 3539.5,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 0.07886435331230283,
|
|
"grad_norm": 0.5562908717471081,
|
|
"learning_rate": 9.999530988130677e-05,
|
|
"loss": 0.6127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6498370170593262,
|
|
"step": 25,
|
|
"valid_targets_mean": 3522.9,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 0.0946372239747634,
|
|
"grad_norm": 0.5548782107356937,
|
|
"learning_rate": 9.999007727733537e-05,
|
|
"loss": 0.6193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.594431459903717,
|
|
"step": 30,
|
|
"valid_targets_mean": 2975.7,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.11041009463722397,
|
|
"grad_norm": 0.4872105480697271,
|
|
"learning_rate": 9.998290696837115e-05,
|
|
"loss": 0.5795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5703617334365845,
|
|
"step": 35,
|
|
"valid_targets_mean": 3581.2,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.12618296529968454,
|
|
"grad_norm": 0.4858873354273939,
|
|
"learning_rate": 9.997379923234816e-05,
|
|
"loss": 0.5804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5455387830734253,
|
|
"step": 40,
|
|
"valid_targets_mean": 3317.4,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.14195583596214512,
|
|
"grad_norm": 0.5108147907048001,
|
|
"learning_rate": 9.996275442229857e-05,
|
|
"loss": 0.5701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5683155655860901,
|
|
"step": 45,
|
|
"valid_targets_mean": 3741.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.15772870662460567,
|
|
"grad_norm": 0.5274941244563711,
|
|
"learning_rate": 9.994977296633902e-05,
|
|
"loss": 0.5859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6010461449623108,
|
|
"step": 50,
|
|
"valid_targets_mean": 3634.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 0.17350157728706625,
|
|
"grad_norm": 0.5937423674749936,
|
|
"learning_rate": 9.993485536765398e-05,
|
|
"loss": 0.5602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5546848773956299,
|
|
"step": 55,
|
|
"valid_targets_mean": 4297.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.1892744479495268,
|
|
"grad_norm": 0.4747000855685691,
|
|
"learning_rate": 9.991800220447634e-05,
|
|
"loss": 0.5922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5315375328063965,
|
|
"step": 60,
|
|
"valid_targets_mean": 3689.7,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 0.20504731861198738,
|
|
"grad_norm": 0.5010503817387185,
|
|
"learning_rate": 9.989921413006489e-05,
|
|
"loss": 0.5805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408827304840088,
|
|
"step": 65,
|
|
"valid_targets_mean": 3859.3,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 0.22082018927444794,
|
|
"grad_norm": 0.468284305556988,
|
|
"learning_rate": 9.987849187267908e-05,
|
|
"loss": 0.5505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5802025198936462,
|
|
"step": 70,
|
|
"valid_targets_mean": 3437.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.23659305993690852,
|
|
"grad_norm": 0.4456626356470159,
|
|
"learning_rate": 9.985583623555076e-05,
|
|
"loss": 0.5571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5259662866592407,
|
|
"step": 75,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.25236593059936907,
|
|
"grad_norm": 0.5189246417709207,
|
|
"learning_rate": 9.9831248096853e-05,
|
|
"loss": 0.5465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5496809482574463,
|
|
"step": 80,
|
|
"valid_targets_mean": 3154.8,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 0.26813880126182965,
|
|
"grad_norm": 0.4242414929398914,
|
|
"learning_rate": 9.980472840966614e-05,
|
|
"loss": 0.558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5265679955482483,
|
|
"step": 85,
|
|
"valid_targets_mean": 4139.5,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 0.28391167192429023,
|
|
"grad_norm": 0.4360826931800694,
|
|
"learning_rate": 9.977627820194082e-05,
|
|
"loss": 0.5496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5053580403327942,
|
|
"step": 90,
|
|
"valid_targets_mean": 4189.2,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.2996845425867508,
|
|
"grad_norm": 0.45768598697634005,
|
|
"learning_rate": 9.974589857645802e-05,
|
|
"loss": 0.5503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5525804758071899,
|
|
"step": 95,
|
|
"valid_targets_mean": 3723.3,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 0.31545741324921134,
|
|
"grad_norm": 0.45279563265028255,
|
|
"learning_rate": 9.97135907107865e-05,
|
|
"loss": 0.5593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5906380414962769,
|
|
"step": 100,
|
|
"valid_targets_mean": 4154.5,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 0.3312302839116719,
|
|
"grad_norm": 0.41876449974133445,
|
|
"learning_rate": 9.967935585723706e-05,
|
|
"loss": 0.5566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.552190899848938,
|
|
"step": 105,
|
|
"valid_targets_mean": 4490.0,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 0.3470031545741325,
|
|
"grad_norm": 0.4041961807602131,
|
|
"learning_rate": 9.964319534281397e-05,
|
|
"loss": 0.562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5655333995819092,
|
|
"step": 110,
|
|
"valid_targets_mean": 4229.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 0.3627760252365931,
|
|
"grad_norm": 0.4960566105435021,
|
|
"learning_rate": 9.960511056916357e-05,
|
|
"loss": 0.5599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5781254768371582,
|
|
"step": 115,
|
|
"valid_targets_mean": 3706.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 0.3785488958990536,
|
|
"grad_norm": 0.4459682147440665,
|
|
"learning_rate": 9.956510301251995e-05,
|
|
"loss": 0.5354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.499944269657135,
|
|
"step": 120,
|
|
"valid_targets_mean": 3662.9,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 0.3943217665615142,
|
|
"grad_norm": 0.3929671447186755,
|
|
"learning_rate": 9.952317422364772e-05,
|
|
"loss": 0.5783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5153095722198486,
|
|
"step": 125,
|
|
"valid_targets_mean": 4084.0,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 0.41009463722397477,
|
|
"grad_norm": 0.42897401373515004,
|
|
"learning_rate": 9.947932582778188e-05,
|
|
"loss": 0.5513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5479700565338135,
|
|
"step": 130,
|
|
"valid_targets_mean": 3615.9,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 0.42586750788643535,
|
|
"grad_norm": 0.4514170365892061,
|
|
"learning_rate": 9.943355952456483e-05,
|
|
"loss": 0.5508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5287145376205444,
|
|
"step": 135,
|
|
"valid_targets_mean": 3968.4,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 0.4416403785488959,
|
|
"grad_norm": 0.4885467964555453,
|
|
"learning_rate": 9.938587708798053e-05,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6355825066566467,
|
|
"step": 140,
|
|
"valid_targets_mean": 3822.2,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 0.45741324921135645,
|
|
"grad_norm": 0.45050804505844816,
|
|
"learning_rate": 9.933628036628569e-05,
|
|
"loss": 0.5707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5147861242294312,
|
|
"step": 145,
|
|
"valid_targets_mean": 4422.2,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 0.47318611987381703,
|
|
"grad_norm": 0.4629732361929168,
|
|
"learning_rate": 9.92847712819381e-05,
|
|
"loss": 0.5351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5328657031059265,
|
|
"step": 150,
|
|
"valid_targets_mean": 3415.7,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.4889589905362776,
|
|
"grad_norm": 0.4247700925965094,
|
|
"learning_rate": 9.923135183152224e-05,
|
|
"loss": 0.5435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5141998529434204,
|
|
"step": 155,
|
|
"valid_targets_mean": 4627.2,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 0.5047318611987381,
|
|
"grad_norm": 0.3810890570531183,
|
|
"learning_rate": 9.91760240856717e-05,
|
|
"loss": 0.5583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5300256609916687,
|
|
"step": 160,
|
|
"valid_targets_mean": 4273.2,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 0.5205047318611987,
|
|
"grad_norm": 0.4096717090938379,
|
|
"learning_rate": 9.91187901889891e-05,
|
|
"loss": 0.5518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5601193904876709,
|
|
"step": 165,
|
|
"valid_targets_mean": 3646.8,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 0.5362776025236593,
|
|
"grad_norm": 0.42309669942651534,
|
|
"learning_rate": 9.905965235996286e-05,
|
|
"loss": 0.5438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901980400085449,
|
|
"step": 170,
|
|
"valid_targets_mean": 3763.0,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 0.5520504731861199,
|
|
"grad_norm": 0.4221962263816931,
|
|
"learning_rate": 9.899861289088121e-05,
|
|
"loss": 0.5728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5548031330108643,
|
|
"step": 175,
|
|
"valid_targets_mean": 3372.2,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 0.5678233438485805,
|
|
"grad_norm": 0.4533484589368771,
|
|
"learning_rate": 9.893567414774341e-05,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5485403537750244,
|
|
"step": 180,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.583596214511041,
|
|
"grad_norm": 0.4201455517245677,
|
|
"learning_rate": 9.88708385701679e-05,
|
|
"loss": 0.5362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5464404821395874,
|
|
"step": 185,
|
|
"valid_targets_mean": 4054.2,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.5993690851735016,
|
|
"grad_norm": 0.3816172170313495,
|
|
"learning_rate": 9.88041086712979e-05,
|
|
"loss": 0.5185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5385897159576416,
|
|
"step": 190,
|
|
"valid_targets_mean": 3817.4,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.6151419558359621,
|
|
"grad_norm": 0.38750216847876323,
|
|
"learning_rate": 9.873548703770388e-05,
|
|
"loss": 0.5143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4823839068412781,
|
|
"step": 195,
|
|
"valid_targets_mean": 4086.1,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 0.6309148264984227,
|
|
"grad_norm": 0.40503340868678855,
|
|
"learning_rate": 9.866497632928336e-05,
|
|
"loss": 0.5225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5157015919685364,
|
|
"step": 200,
|
|
"valid_targets_mean": 4078.6,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.6466876971608833,
|
|
"grad_norm": 0.3541231531701395,
|
|
"learning_rate": 9.859257927915774e-05,
|
|
"loss": 0.5376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5179704427719116,
|
|
"step": 205,
|
|
"valid_targets_mean": 3903.1,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.6624605678233438,
|
|
"grad_norm": 0.41202758172155246,
|
|
"learning_rate": 9.851829869356651e-05,
|
|
"loss": 0.522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5223180055618286,
|
|
"step": 210,
|
|
"valid_targets_mean": 4136.1,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.6782334384858044,
|
|
"grad_norm": 0.423543405280611,
|
|
"learning_rate": 9.844213745175826e-05,
|
|
"loss": 0.5332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5781832337379456,
|
|
"step": 215,
|
|
"valid_targets_mean": 3915.7,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 0.694006309148265,
|
|
"grad_norm": 0.35669299013566746,
|
|
"learning_rate": 9.83640985058792e-05,
|
|
"loss": 0.5092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47874531149864197,
|
|
"step": 220,
|
|
"valid_targets_mean": 3844.2,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.7097791798107256,
|
|
"grad_norm": 0.3925005403471201,
|
|
"learning_rate": 9.828418488085877e-05,
|
|
"loss": 0.5302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5382158756256104,
|
|
"step": 225,
|
|
"valid_targets_mean": 3521.7,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 0.7255520504731862,
|
|
"grad_norm": 0.4325120394989433,
|
|
"learning_rate": 9.820239967429233e-05,
|
|
"loss": 0.5371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5795000791549683,
|
|
"step": 230,
|
|
"valid_targets_mean": 3122.2,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.7413249211356467,
|
|
"grad_norm": 0.36208200156129844,
|
|
"learning_rate": 9.811874605632104e-05,
|
|
"loss": 0.5283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5075106620788574,
|
|
"step": 235,
|
|
"valid_targets_mean": 4049.4,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.7570977917981072,
|
|
"grad_norm": 0.39540549433667344,
|
|
"learning_rate": 9.803322726950905e-05,
|
|
"loss": 0.5378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5728358626365662,
|
|
"step": 240,
|
|
"valid_targets_mean": 4206.9,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.7728706624605678,
|
|
"grad_norm": 0.41657667507777546,
|
|
"learning_rate": 9.794584662871787e-05,
|
|
"loss": 0.5135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5691760778427124,
|
|
"step": 245,
|
|
"valid_targets_mean": 3891.3,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.7886435331230284,
|
|
"grad_norm": 0.36129818659509033,
|
|
"learning_rate": 9.785660752097768e-05,
|
|
"loss": 0.511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49009859561920166,
|
|
"step": 250,
|
|
"valid_targets_mean": 4718.1,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 0.804416403785489,
|
|
"grad_norm": 0.3954585394141833,
|
|
"learning_rate": 9.77655134053563e-05,
|
|
"loss": 0.5271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5566205978393555,
|
|
"step": 255,
|
|
"valid_targets_mean": 3992.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.8201892744479495,
|
|
"grad_norm": 0.4457152822998143,
|
|
"learning_rate": 9.767256781282486e-05,
|
|
"loss": 0.5459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5469316244125366,
|
|
"step": 260,
|
|
"valid_targets_mean": 3295.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.8359621451104101,
|
|
"grad_norm": 0.42830831852575374,
|
|
"learning_rate": 9.757777434612116e-05,
|
|
"loss": 0.5321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5643436908721924,
|
|
"step": 265,
|
|
"valid_targets_mean": 3402.6,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 0.8517350157728707,
|
|
"grad_norm": 0.35560598847559505,
|
|
"learning_rate": 9.748113667960987e-05,
|
|
"loss": 0.5416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5399414300918579,
|
|
"step": 270,
|
|
"valid_targets_mean": 4290.6,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 0.8675078864353313,
|
|
"grad_norm": 0.3318267291185689,
|
|
"learning_rate": 9.738265855914013e-05,
|
|
"loss": 0.4984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5012451410293579,
|
|
"step": 275,
|
|
"valid_targets_mean": 4652.2,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 0.8832807570977917,
|
|
"grad_norm": 0.37641165858900405,
|
|
"learning_rate": 9.728234380190038e-05,
|
|
"loss": 0.5217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5358394384384155,
|
|
"step": 280,
|
|
"valid_targets_mean": 3566.7,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.8990536277602523,
|
|
"grad_norm": 0.3707422402953193,
|
|
"learning_rate": 9.718019629627045e-05,
|
|
"loss": 0.5383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5315133333206177,
|
|
"step": 285,
|
|
"valid_targets_mean": 4248.1,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 0.9148264984227129,
|
|
"grad_norm": 0.36379579886903163,
|
|
"learning_rate": 9.70762200016707e-05,
|
|
"loss": 0.5156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49599626660346985,
|
|
"step": 290,
|
|
"valid_targets_mean": 3829.4,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.9305993690851735,
|
|
"grad_norm": 0.3602052757244127,
|
|
"learning_rate": 9.697041894840865e-05,
|
|
"loss": 0.5172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5145964622497559,
|
|
"step": 295,
|
|
"valid_targets_mean": 3755.7,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 0.9463722397476341,
|
|
"grad_norm": 0.3383105129798826,
|
|
"learning_rate": 9.68627972375228e-05,
|
|
"loss": 0.5197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5156067609786987,
|
|
"step": 300,
|
|
"valid_targets_mean": 4581.9,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 0.9621451104100947,
|
|
"grad_norm": 0.39478296443847477,
|
|
"learning_rate": 9.675335904062353e-05,
|
|
"loss": 0.509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5009207129478455,
|
|
"step": 305,
|
|
"valid_targets_mean": 3574.9,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.9779179810725552,
|
|
"grad_norm": 0.3701507161997181,
|
|
"learning_rate": 9.66421085997315e-05,
|
|
"loss": 0.515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5108104944229126,
|
|
"step": 310,
|
|
"valid_targets_mean": 3732.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.9936908517350158,
|
|
"grad_norm": 0.36362679379892143,
|
|
"learning_rate": 9.65290502271132e-05,
|
|
"loss": 0.5298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5177055597305298,
|
|
"step": 315,
|
|
"valid_targets_mean": 4006.8,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 1.0094637223974763,
|
|
"grad_norm": 0.44494333561483296,
|
|
"learning_rate": 9.641418830511377e-05,
|
|
"loss": 0.469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4171540141105652,
|
|
"step": 320,
|
|
"valid_targets_mean": 3149.5,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.025236593059937,
|
|
"grad_norm": 0.3391573696995791,
|
|
"learning_rate": 9.62975272859872e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39022213220596313,
|
|
"step": 325,
|
|
"valid_targets_mean": 4175.0,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 1.0410094637223974,
|
|
"grad_norm": 0.4148022873924609,
|
|
"learning_rate": 9.617907169172367e-05,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4439923167228699,
|
|
"step": 330,
|
|
"valid_targets_mean": 3986.2,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 1.0567823343848581,
|
|
"grad_norm": 0.3565887954497388,
|
|
"learning_rate": 9.605882611387432e-05,
|
|
"loss": 0.4526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44107919931411743,
|
|
"step": 335,
|
|
"valid_targets_mean": 4259.3,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 1.0725552050473186,
|
|
"grad_norm": 0.3907577587630456,
|
|
"learning_rate": 9.593679521337327e-05,
|
|
"loss": 0.4484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45283353328704834,
|
|
"step": 340,
|
|
"valid_targets_mean": 3893.7,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.088328075709779,
|
|
"grad_norm": 0.3807742307702375,
|
|
"learning_rate": 9.581298372035695e-05,
|
|
"loss": 0.4302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4250805675983429,
|
|
"step": 345,
|
|
"valid_targets_mean": 3853.0,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 1.1041009463722398,
|
|
"grad_norm": 0.44905191912330616,
|
|
"learning_rate": 9.56873964339807e-05,
|
|
"loss": 0.4462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5217335224151611,
|
|
"step": 350,
|
|
"valid_targets_mean": 2976.3,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.1198738170347002,
|
|
"grad_norm": 0.3699538409851181,
|
|
"learning_rate": 9.556003822223287e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3780270218849182,
|
|
"step": 355,
|
|
"valid_targets_mean": 4304.0,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.135646687697161,
|
|
"grad_norm": 0.38031489838469096,
|
|
"learning_rate": 9.5430914021746e-05,
|
|
"loss": 0.4371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4074738323688507,
|
|
"step": 360,
|
|
"valid_targets_mean": 3196.3,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 1.1514195583596214,
|
|
"grad_norm": 0.382134493241582,
|
|
"learning_rate": 9.530002883760552e-05,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43987852334976196,
|
|
"step": 365,
|
|
"valid_targets_mean": 3962.5,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.167192429022082,
|
|
"grad_norm": 0.37585063446780165,
|
|
"learning_rate": 9.516738774315577e-05,
|
|
"loss": 0.4395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4138225018978119,
|
|
"step": 370,
|
|
"valid_targets_mean": 3164.1,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 1.1829652996845426,
|
|
"grad_norm": 0.3805495887688742,
|
|
"learning_rate": 9.503299587980331e-05,
|
|
"loss": 0.4434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44806328415870667,
|
|
"step": 375,
|
|
"valid_targets_mean": 3578.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.1987381703470033,
|
|
"grad_norm": 0.376773409845666,
|
|
"learning_rate": 9.489685845681762e-05,
|
|
"loss": 0.4428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4336075186729431,
|
|
"step": 380,
|
|
"valid_targets_mean": 3914.1,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 1.2145110410094637,
|
|
"grad_norm": 0.38804566747120484,
|
|
"learning_rate": 9.47589807511292e-05,
|
|
"loss": 0.4292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4264313876628876,
|
|
"step": 385,
|
|
"valid_targets_mean": 3593.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.2302839116719242,
|
|
"grad_norm": 0.36513112328321634,
|
|
"learning_rate": 9.461936810712507e-05,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47045087814331055,
|
|
"step": 390,
|
|
"valid_targets_mean": 4321.6,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 1.2460567823343849,
|
|
"grad_norm": 0.39079786099369523,
|
|
"learning_rate": 9.447802593644152e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5255352258682251,
|
|
"step": 395,
|
|
"valid_targets_mean": 3777.8,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 1.2618296529968454,
|
|
"grad_norm": 0.3398501520957368,
|
|
"learning_rate": 9.433495971775444e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4048163890838623,
|
|
"step": 400,
|
|
"valid_targets_mean": 4000.7,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 1.277602523659306,
|
|
"grad_norm": 0.3670308220814651,
|
|
"learning_rate": 9.419017499656686e-05,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4233490228652954,
|
|
"step": 405,
|
|
"valid_targets_mean": 3596.8,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 1.2933753943217665,
|
|
"grad_norm": 0.40227891912103875,
|
|
"learning_rate": 9.404367738499409e-05,
|
|
"loss": 0.4616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.465755432844162,
|
|
"step": 410,
|
|
"valid_targets_mean": 3719.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 1.3091482649842272,
|
|
"grad_norm": 0.35283201135451225,
|
|
"learning_rate": 9.38954725615461e-05,
|
|
"loss": 0.4729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45133858919143677,
|
|
"step": 415,
|
|
"valid_targets_mean": 4254.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.3249211356466877,
|
|
"grad_norm": 0.3570013813746019,
|
|
"learning_rate": 9.374556627090749e-05,
|
|
"loss": 0.4627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4204801619052887,
|
|
"step": 420,
|
|
"valid_targets_mean": 3770.4,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 1.3406940063091484,
|
|
"grad_norm": 0.37223503941855,
|
|
"learning_rate": 9.359396432371476e-05,
|
|
"loss": 0.4677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5173239707946777,
|
|
"step": 425,
|
|
"valid_targets_mean": 4230.2,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 1.3564668769716088,
|
|
"grad_norm": 0.3301770644383114,
|
|
"learning_rate": 9.344067259633112e-05,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398292601108551,
|
|
"step": 430,
|
|
"valid_targets_mean": 4243.5,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.3722397476340693,
|
|
"grad_norm": 0.36270954698851193,
|
|
"learning_rate": 9.328569703061862e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45556944608688354,
|
|
"step": 435,
|
|
"valid_targets_mean": 3909.2,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.38801261829653,
|
|
"grad_norm": 0.37539857073698474,
|
|
"learning_rate": 9.3129043633708e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4796835482120514,
|
|
"step": 440,
|
|
"valid_targets_mean": 4132.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.4037854889589905,
|
|
"grad_norm": 0.33638929802502493,
|
|
"learning_rate": 9.297071847776568e-05,
|
|
"loss": 0.4423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059070646762848,
|
|
"step": 445,
|
|
"valid_targets_mean": 3642.7,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.4195583596214512,
|
|
"grad_norm": 0.4001184728503333,
|
|
"learning_rate": 9.281072769975847e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47804516553878784,
|
|
"step": 450,
|
|
"valid_targets_mean": 3378.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.4353312302839116,
|
|
"grad_norm": 0.4029482297411805,
|
|
"learning_rate": 9.264907750121568e-05,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44277799129486084,
|
|
"step": 455,
|
|
"valid_targets_mean": 3196.6,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.4511041009463723,
|
|
"grad_norm": 0.4036784853782572,
|
|
"learning_rate": 9.248577414798871e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43349453806877136,
|
|
"step": 460,
|
|
"valid_targets_mean": 3283.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.4668769716088328,
|
|
"grad_norm": 0.36312925454919104,
|
|
"learning_rate": 9.232082397000826e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4773409962654114,
|
|
"step": 465,
|
|
"valid_targets_mean": 3814.1,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.4826498422712935,
|
|
"grad_norm": 0.3528726566779447,
|
|
"learning_rate": 9.215423336103884e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4232112765312195,
|
|
"step": 470,
|
|
"valid_targets_mean": 4029.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.498422712933754,
|
|
"grad_norm": 0.41683241533869353,
|
|
"learning_rate": 9.198600877843105e-05,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48993387818336487,
|
|
"step": 475,
|
|
"valid_targets_mean": 2998.8,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 1.5141955835962144,
|
|
"grad_norm": 0.34971035022348906,
|
|
"learning_rate": 9.181615674287121e-05,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43452027440071106,
|
|
"step": 480,
|
|
"valid_targets_mean": 4365.5,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.5299684542586751,
|
|
"grad_norm": 0.7751137379112104,
|
|
"learning_rate": 9.164468383812864e-05,
|
|
"loss": 0.4403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4532795548439026,
|
|
"step": 485,
|
|
"valid_targets_mean": 3795.7,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 1.5457413249211358,
|
|
"grad_norm": 0.401862455648203,
|
|
"learning_rate": 9.147159671080049e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4684237539768219,
|
|
"step": 490,
|
|
"valid_targets_mean": 3587.4,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.5615141955835963,
|
|
"grad_norm": 0.37373814768090685,
|
|
"learning_rate": 9.129690207005402e-05,
|
|
"loss": 0.4551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4561198949813843,
|
|
"step": 495,
|
|
"valid_targets_mean": 3667.8,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 1.5772870662460567,
|
|
"grad_norm": 0.3475217588458817,
|
|
"learning_rate": 9.11206066873666e-05,
|
|
"loss": 0.439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43660348653793335,
|
|
"step": 500,
|
|
"valid_targets_mean": 3716.6,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 1.5930599369085172,
|
|
"grad_norm": 0.4051905185882745,
|
|
"learning_rate": 9.094271739626326e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43010252714157104,
|
|
"step": 505,
|
|
"valid_targets_mean": 2970.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.608832807570978,
|
|
"grad_norm": 0.34036475182222864,
|
|
"learning_rate": 9.076324109205174e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4386584162712097,
|
|
"step": 510,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.6246056782334386,
|
|
"grad_norm": 0.3498122275381102,
|
|
"learning_rate": 9.058218473155528e-05,
|
|
"loss": 0.4333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4222657084465027,
|
|
"step": 515,
|
|
"valid_targets_mean": 3462.6,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 1.640378548895899,
|
|
"grad_norm": 0.3369248339344111,
|
|
"learning_rate": 9.039955533284292e-05,
|
|
"loss": 0.4527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4398431181907654,
|
|
"step": 520,
|
|
"valid_targets_mean": 3989.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 1.6561514195583595,
|
|
"grad_norm": 0.36080386098017403,
|
|
"learning_rate": 9.021535997495749e-05,
|
|
"loss": 0.471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48137784004211426,
|
|
"step": 525,
|
|
"valid_targets_mean": 3954.6,
|
|
"valid_targets_min": 866
|
|
},
|
|
{
|
|
"epoch": 1.6719242902208202,
|
|
"grad_norm": 0.3718481057610134,
|
|
"learning_rate": 9.002960579764116e-05,
|
|
"loss": 0.4503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4851943254470825,
|
|
"step": 530,
|
|
"valid_targets_mean": 3200.0,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 1.687697160883281,
|
|
"grad_norm": 0.36465288551264813,
|
|
"learning_rate": 8.984230000105882e-05,
|
|
"loss": 0.4455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4893612265586853,
|
|
"step": 535,
|
|
"valid_targets_mean": 3724.4,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.7034700315457414,
|
|
"grad_norm": 0.32735990049855485,
|
|
"learning_rate": 8.965344984551882e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38390782475471497,
|
|
"step": 540,
|
|
"valid_targets_mean": 3728.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.7192429022082019,
|
|
"grad_norm": 0.3405505588639974,
|
|
"learning_rate": 8.946306265119167e-05,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4467661380767822,
|
|
"step": 545,
|
|
"valid_targets_mean": 3709.7,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 1.7350157728706623,
|
|
"grad_norm": 0.3652479819607644,
|
|
"learning_rate": 8.927114579782625e-05,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43158066272735596,
|
|
"step": 550,
|
|
"valid_targets_mean": 3628.7,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.750788643533123,
|
|
"grad_norm": 0.3822997428821815,
|
|
"learning_rate": 8.907770672446381e-05,
|
|
"loss": 0.4574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4463322162628174,
|
|
"step": 555,
|
|
"valid_targets_mean": 3338.1,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 1.7665615141955837,
|
|
"grad_norm": 0.3485646535388207,
|
|
"learning_rate": 8.888275292914948e-05,
|
|
"loss": 0.4397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42743009328842163,
|
|
"step": 560,
|
|
"valid_targets_mean": 3594.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.7823343848580442,
|
|
"grad_norm": 0.3109895750423863,
|
|
"learning_rate": 8.868629196864182e-05,
|
|
"loss": 0.4264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3907405138015747,
|
|
"step": 565,
|
|
"valid_targets_mean": 4172.2,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 1.7981072555205047,
|
|
"grad_norm": 0.3707666760898621,
|
|
"learning_rate": 8.848833145811976e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4277447760105133,
|
|
"step": 570,
|
|
"valid_targets_mean": 3259.4,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 1.8138801261829653,
|
|
"grad_norm": 0.4062312243161753,
|
|
"learning_rate": 8.828887907088753e-05,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45710551738739014,
|
|
"step": 575,
|
|
"valid_targets_mean": 3280.9,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.8296529968454258,
|
|
"grad_norm": 0.34345659850944893,
|
|
"learning_rate": 8.808794253807707e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40221577882766724,
|
|
"step": 580,
|
|
"valid_targets_mean": 3658.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.8454258675078865,
|
|
"grad_norm": 0.3330687260048639,
|
|
"learning_rate": 8.788552964834859e-05,
|
|
"loss": 0.4468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46073347330093384,
|
|
"step": 585,
|
|
"valid_targets_mean": 3980.0,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 1.861198738170347,
|
|
"grad_norm": 0.3397094597785125,
|
|
"learning_rate": 8.768164824758846e-05,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4085126221179962,
|
|
"step": 590,
|
|
"valid_targets_mean": 3243.0,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 1.8769716088328074,
|
|
"grad_norm": 0.31999705156875274,
|
|
"learning_rate": 8.747630623860521e-05,
|
|
"loss": 0.4274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41694074869155884,
|
|
"step": 595,
|
|
"valid_targets_mean": 4215.4,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 1.8927444794952681,
|
|
"grad_norm": 0.33040562917463656,
|
|
"learning_rate": 8.726951158082311e-05,
|
|
"loss": 0.4339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4324493408203125,
|
|
"step": 600,
|
|
"valid_targets_mean": 4159.7,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 1.9085173501577288,
|
|
"grad_norm": 0.36434921623211575,
|
|
"learning_rate": 8.706127228997376e-05,
|
|
"loss": 0.4539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4327438175678253,
|
|
"step": 605,
|
|
"valid_targets_mean": 3312.7,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 1.9242902208201893,
|
|
"grad_norm": 0.35804162946121054,
|
|
"learning_rate": 8.685159643778528e-05,
|
|
"loss": 0.431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4164193272590637,
|
|
"step": 610,
|
|
"valid_targets_mean": 3273.8,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 1.9400630914826498,
|
|
"grad_norm": 0.3485498776902668,
|
|
"learning_rate": 8.664049215166955e-05,
|
|
"loss": 0.4606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5004233717918396,
|
|
"step": 615,
|
|
"valid_targets_mean": 4313.6,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 1.9558359621451105,
|
|
"grad_norm": 0.3544861888530339,
|
|
"learning_rate": 8.6427967614407e-05,
|
|
"loss": 0.4387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4689190089702606,
|
|
"step": 620,
|
|
"valid_targets_mean": 3861.9,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 1.971608832807571,
|
|
"grad_norm": 0.35670645569674425,
|
|
"learning_rate": 8.621403106382968e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4165767431259155,
|
|
"step": 625,
|
|
"valid_targets_mean": 3283.2,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.9873817034700316,
|
|
"grad_norm": 0.3739226579042923,
|
|
"learning_rate": 8.599869079250165e-05,
|
|
"loss": 0.4582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4649926424026489,
|
|
"step": 630,
|
|
"valid_targets_mean": 3385.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.003154574132492,
|
|
"grad_norm": 0.4652236465790686,
|
|
"learning_rate": 8.578195514739784e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3751116096973419,
|
|
"step": 635,
|
|
"valid_targets_mean": 3725.0,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.0189274447949526,
|
|
"grad_norm": 0.43822671559849474,
|
|
"learning_rate": 8.556383252958026e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3273969292640686,
|
|
"step": 640,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 2.034700315457413,
|
|
"grad_norm": 0.34010705122481344,
|
|
"learning_rate": 8.534433139387259e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3301159739494324,
|
|
"step": 645,
|
|
"valid_targets_mean": 4554.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.050473186119874,
|
|
"grad_norm": 0.3775080402751895,
|
|
"learning_rate": 8.512346024853219e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3433346152305603,
|
|
"step": 650,
|
|
"valid_targets_mean": 3910.6,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 2.0662460567823344,
|
|
"grad_norm": 0.3728164026489659,
|
|
"learning_rate": 8.490122765492057e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3310161828994751,
|
|
"step": 655,
|
|
"valid_targets_mean": 3759.2,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 2.082018927444795,
|
|
"grad_norm": 0.37117967483663206,
|
|
"learning_rate": 8.467764222717136e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32798779010772705,
|
|
"step": 660,
|
|
"valid_targets_mean": 3875.6,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.0977917981072554,
|
|
"grad_norm": 0.41952911991741526,
|
|
"learning_rate": 8.445271263185646e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38159024715423584,
|
|
"step": 665,
|
|
"valid_targets_mean": 3336.3,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.1135646687697163,
|
|
"grad_norm": 0.3661960517220788,
|
|
"learning_rate": 8.422644758765012e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3457246422767639,
|
|
"step": 670,
|
|
"valid_targets_mean": 4169.8,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 2.1293375394321767,
|
|
"grad_norm": 0.36231858184940324,
|
|
"learning_rate": 8.399885586499101e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32879745960235596,
|
|
"step": 675,
|
|
"valid_targets_mean": 4030.4,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 2.145110410094637,
|
|
"grad_norm": 0.3744186649078691,
|
|
"learning_rate": 8.376994628574219e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3317606747150421,
|
|
"step": 680,
|
|
"valid_targets_mean": 4177.5,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 2.1608832807570977,
|
|
"grad_norm": 0.401336910401923,
|
|
"learning_rate": 8.353972772284927e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34010061621665955,
|
|
"step": 685,
|
|
"valid_targets_mean": 3503.7,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 2.176656151419558,
|
|
"grad_norm": 0.38258080569669956,
|
|
"learning_rate": 8.330820909999633e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34190699458122253,
|
|
"step": 690,
|
|
"valid_targets_mean": 3343.3,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 2.192429022082019,
|
|
"grad_norm": 0.4079088718039198,
|
|
"learning_rate": 8.307539939126016e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35490545630455017,
|
|
"step": 695,
|
|
"valid_targets_mean": 3479.4,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 2.2082018927444795,
|
|
"grad_norm": 0.3784901118731761,
|
|
"learning_rate": 8.284130762076235e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070756793022156,
|
|
"step": 700,
|
|
"valid_targets_mean": 3581.0,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 2.22397476340694,
|
|
"grad_norm": 0.3620067925312215,
|
|
"learning_rate": 8.260594286231947e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36078643798828125,
|
|
"step": 705,
|
|
"valid_targets_mean": 4102.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 2.2397476340694005,
|
|
"grad_norm": 0.37855572907947915,
|
|
"learning_rate": 8.236931423909138e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3413460850715637,
|
|
"step": 710,
|
|
"valid_targets_mean": 3942.3,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 2.2555205047318614,
|
|
"grad_norm": 0.3736466247053816,
|
|
"learning_rate": 8.213143092322769e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37210655212402344,
|
|
"step": 715,
|
|
"valid_targets_mean": 3995.8,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.271293375394322,
|
|
"grad_norm": 0.36249923496948544,
|
|
"learning_rate": 8.189230213551202e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3457183837890625,
|
|
"step": 720,
|
|
"valid_targets_mean": 3800.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 2.2870662460567823,
|
|
"grad_norm": 0.39967218810071103,
|
|
"learning_rate": 8.165193714500481e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37980931997299194,
|
|
"step": 725,
|
|
"valid_targets_mean": 3775.7,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 2.302839116719243,
|
|
"grad_norm": 0.3735154397123298,
|
|
"learning_rate": 8.141034526868389e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3548370599746704,
|
|
"step": 730,
|
|
"valid_targets_mean": 3849.8,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 2.3186119873817033,
|
|
"grad_norm": 0.33566493790874236,
|
|
"learning_rate": 8.116753587108339e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007664680480957,
|
|
"step": 735,
|
|
"valid_targets_mean": 4062.7,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 2.334384858044164,
|
|
"grad_norm": 0.34171038668054404,
|
|
"learning_rate": 8.092351836393076e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3267410397529602,
|
|
"step": 740,
|
|
"valid_targets_mean": 4180.0,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.3501577287066246,
|
|
"grad_norm": 0.4143460695405696,
|
|
"learning_rate": 8.067830220578191e-05,
|
|
"loss": 0.3717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3674328625202179,
|
|
"step": 745,
|
|
"valid_targets_mean": 3244.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 2.365930599369085,
|
|
"grad_norm": 0.3698488243960478,
|
|
"learning_rate": 8.043189690165467e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3374217450618744,
|
|
"step": 750,
|
|
"valid_targets_mean": 3789.4,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 2.3817034700315456,
|
|
"grad_norm": 0.35690943739773573,
|
|
"learning_rate": 8.018431200266023e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33900323510169983,
|
|
"step": 755,
|
|
"valid_targets_mean": 3827.4,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 2.3974763406940065,
|
|
"grad_norm": 0.3850669222911541,
|
|
"learning_rate": 7.993555710563303e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33071190118789673,
|
|
"step": 760,
|
|
"valid_targets_mean": 3576.0,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 2.413249211356467,
|
|
"grad_norm": 0.35558033307659687,
|
|
"learning_rate": 7.968564185275873e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34395042061805725,
|
|
"step": 765,
|
|
"valid_targets_mean": 4245.2,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.4290220820189274,
|
|
"grad_norm": 0.3696713237256321,
|
|
"learning_rate": 7.943457593120045e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495572805404663,
|
|
"step": 770,
|
|
"valid_targets_mean": 3356.2,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 2.444794952681388,
|
|
"grad_norm": 0.31257484242301387,
|
|
"learning_rate": 7.918236907272327e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33376145362854004,
|
|
"step": 775,
|
|
"valid_targets_mean": 4976.2,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.4605678233438484,
|
|
"grad_norm": 0.3960160133368537,
|
|
"learning_rate": 7.892903105331712e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35692280530929565,
|
|
"step": 780,
|
|
"valid_targets_mean": 3351.8,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.4763406940063093,
|
|
"grad_norm": 0.4017364185807611,
|
|
"learning_rate": 7.867457169281765e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34925976395606995,
|
|
"step": 785,
|
|
"valid_targets_mean": 3252.7,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 2.4921135646687698,
|
|
"grad_norm": 0.36347535237779893,
|
|
"learning_rate": 7.841900085452574e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32482895255088806,
|
|
"step": 790,
|
|
"valid_targets_mean": 3577.0,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 2.5078864353312302,
|
|
"grad_norm": 0.3659525671502537,
|
|
"learning_rate": 7.816232844482516e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31454741954803467,
|
|
"step": 795,
|
|
"valid_targets_mean": 3475.5,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.5236593059936907,
|
|
"grad_norm": 0.38262913290066447,
|
|
"learning_rate": 7.790456441279853e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33849939703941345,
|
|
"step": 800,
|
|
"valid_targets_mean": 3591.1,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 2.5394321766561516,
|
|
"grad_norm": 0.3945192774714041,
|
|
"learning_rate": 7.764571874984174e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39895951747894287,
|
|
"step": 805,
|
|
"valid_targets_mean": 3441.2,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.555205047318612,
|
|
"grad_norm": 0.4103794578592908,
|
|
"learning_rate": 7.73858014892766e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38766610622406006,
|
|
"step": 810,
|
|
"valid_targets_mean": 3518.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.5709779179810726,
|
|
"grad_norm": 0.34204539742317147,
|
|
"learning_rate": 7.712482270596199e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32463714480400085,
|
|
"step": 815,
|
|
"valid_targets_mean": 3946.6,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 2.586750788643533,
|
|
"grad_norm": 0.38589899224927104,
|
|
"learning_rate": 7.686279251590331e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3244478702545166,
|
|
"step": 820,
|
|
"valid_targets_mean": 3312.8,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.6025236593059935,
|
|
"grad_norm": 0.4052155719856952,
|
|
"learning_rate": 7.659972107586035e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3753927946090698,
|
|
"step": 825,
|
|
"valid_targets_mean": 4110.9,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 2.6182965299684544,
|
|
"grad_norm": 0.343927305431227,
|
|
"learning_rate": 7.633561858295364e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36553817987442017,
|
|
"step": 830,
|
|
"valid_targets_mean": 4224.3,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 2.634069400630915,
|
|
"grad_norm": 0.37883054228512897,
|
|
"learning_rate": 7.607049527426916e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33783310651779175,
|
|
"step": 835,
|
|
"valid_targets_mean": 3950.9,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.6498422712933754,
|
|
"grad_norm": 0.37159620397410714,
|
|
"learning_rate": 7.580436142646155e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38431215286254883,
|
|
"step": 840,
|
|
"valid_targets_mean": 4062.4,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 2.665615141955836,
|
|
"grad_norm": 0.355718961519934,
|
|
"learning_rate": 7.55372273553557e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32091930508613586,
|
|
"step": 845,
|
|
"valid_targets_mean": 3696.0,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 2.6813880126182967,
|
|
"grad_norm": 0.3361604204167694,
|
|
"learning_rate": 7.526910341554703e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34885716438293457,
|
|
"step": 850,
|
|
"valid_targets_mean": 4492.7,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 2.697160883280757,
|
|
"grad_norm": 0.366769390411755,
|
|
"learning_rate": 7.500000000000001e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33402395248413086,
|
|
"step": 855,
|
|
"valid_targets_mean": 3548.5,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.7129337539432177,
|
|
"grad_norm": 0.3691866466890524,
|
|
"learning_rate": 7.472992753964532e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3604165017604828,
|
|
"step": 860,
|
|
"valid_targets_mean": 3894.6,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 2.728706624605678,
|
|
"grad_norm": 0.3289605476496754,
|
|
"learning_rate": 7.445889650297559e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3566170334815979,
|
|
"step": 865,
|
|
"valid_targets_mean": 4428.1,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.7444794952681386,
|
|
"grad_norm": 0.36697277963466485,
|
|
"learning_rate": 7.418691739563957e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3522750735282898,
|
|
"step": 870,
|
|
"valid_targets_mean": 3775.8,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 2.7602523659305995,
|
|
"grad_norm": 0.37685217196788956,
|
|
"learning_rate": 7.391400076003492e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203175961971283,
|
|
"step": 875,
|
|
"valid_targets_mean": 3458.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.77602523659306,
|
|
"grad_norm": 0.39105467876376565,
|
|
"learning_rate": 7.36401571748996e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3444768786430359,
|
|
"step": 880,
|
|
"valid_targets_mean": 3191.3,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 2.7917981072555205,
|
|
"grad_norm": 0.3480244096566246,
|
|
"learning_rate": 7.336539725490178e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34411507844924927,
|
|
"step": 885,
|
|
"valid_targets_mean": 4004.1,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 2.807570977917981,
|
|
"grad_norm": 0.3827153845033614,
|
|
"learning_rate": 7.30897316502284e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37474557757377625,
|
|
"step": 890,
|
|
"valid_targets_mean": 3213.1,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 2.823343848580442,
|
|
"grad_norm": 0.35019919394636845,
|
|
"learning_rate": 7.281317104617239e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495219349861145,
|
|
"step": 895,
|
|
"valid_targets_mean": 3676.5,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 2.8391167192429023,
|
|
"grad_norm": 0.3668968163187891,
|
|
"learning_rate": 7.253572616271844e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3313937187194824,
|
|
"step": 900,
|
|
"valid_targets_mean": 3489.2,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 2.854889589905363,
|
|
"grad_norm": 0.35242471370100514,
|
|
"learning_rate": 7.225740775412751e-05,
|
|
"loss": 0.3455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32235974073410034,
|
|
"step": 905,
|
|
"valid_targets_mean": 4352.0,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 2.8706624605678233,
|
|
"grad_norm": 0.3664024727243619,
|
|
"learning_rate": 7.197822660851991e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3668972849845886,
|
|
"step": 910,
|
|
"valid_targets_mean": 4366.6,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.8864353312302837,
|
|
"grad_norm": 0.3578280364539068,
|
|
"learning_rate": 7.169819354745725e-05,
|
|
"loss": 0.3552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3774711489677429,
|
|
"step": 915,
|
|
"valid_targets_mean": 3474.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 2.9022082018927446,
|
|
"grad_norm": 0.39067610873109937,
|
|
"learning_rate": 7.141731942552288e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37125909328460693,
|
|
"step": 920,
|
|
"valid_targets_mean": 3281.9,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 2.917981072555205,
|
|
"grad_norm": 0.36500287371289847,
|
|
"learning_rate": 7.113561512990119e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34835952520370483,
|
|
"step": 925,
|
|
"valid_targets_mean": 3716.3,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.9337539432176656,
|
|
"grad_norm": 0.3640542876272178,
|
|
"learning_rate": 7.085309157995557e-05,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35117635130882263,
|
|
"step": 930,
|
|
"valid_targets_mean": 3512.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 2.949526813880126,
|
|
"grad_norm": 0.35850312668982054,
|
|
"learning_rate": 7.056975972680517e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39214226603507996,
|
|
"step": 935,
|
|
"valid_targets_mean": 3744.8,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 2.965299684542587,
|
|
"grad_norm": 0.3355639560460545,
|
|
"learning_rate": 7.028563055290044e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3316793441772461,
|
|
"step": 940,
|
|
"valid_targets_mean": 4230.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.9810725552050474,
|
|
"grad_norm": 0.32982130499991974,
|
|
"learning_rate": 7.000071507159744e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32892876863479614,
|
|
"step": 945,
|
|
"valid_targets_mean": 4000.3,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 2.996845425867508,
|
|
"grad_norm": 0.34264280670103553,
|
|
"learning_rate": 6.971502432673085e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32262521982192993,
|
|
"step": 950,
|
|
"valid_targets_mean": 3800.8,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 3.0126182965299684,
|
|
"grad_norm": 0.7097009281353539,
|
|
"learning_rate": 6.942856939218599e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25441882014274597,
|
|
"step": 955,
|
|
"valid_targets_mean": 3380.2,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.028391167192429,
|
|
"grad_norm": 0.4141083590950299,
|
|
"learning_rate": 6.914136137146951e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23335978388786316,
|
|
"step": 960,
|
|
"valid_targets_mean": 4133.7,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.0441640378548898,
|
|
"grad_norm": 0.46856537273414334,
|
|
"learning_rate": 6.885341139727912e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24540895223617554,
|
|
"step": 965,
|
|
"valid_targets_mean": 3925.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 3.0599369085173502,
|
|
"grad_norm": 0.4418748358978105,
|
|
"learning_rate": 6.856473063107187e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26762479543685913,
|
|
"step": 970,
|
|
"valid_targets_mean": 3725.4,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.0757097791798107,
|
|
"grad_norm": 0.43206699700017354,
|
|
"learning_rate": 6.827533026263169e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583632469177246,
|
|
"step": 975,
|
|
"valid_targets_mean": 3629.8,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.091482649842271,
|
|
"grad_norm": 0.3617975005945939,
|
|
"learning_rate": 6.798522150963552e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22357064485549927,
|
|
"step": 980,
|
|
"valid_targets_mean": 4364.8,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.107255520504732,
|
|
"grad_norm": 0.38710853094314396,
|
|
"learning_rate": 6.769441561721863e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311776876449585,
|
|
"step": 985,
|
|
"valid_targets_mean": 4206.0,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.1230283911671926,
|
|
"grad_norm": 0.38654625221316713,
|
|
"learning_rate": 6.740292385753858e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2150537222623825,
|
|
"step": 990,
|
|
"valid_targets_mean": 3844.5,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 3.138801261829653,
|
|
"grad_norm": 0.6526308811071234,
|
|
"learning_rate": 6.711075752933847e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665025591850281,
|
|
"step": 995,
|
|
"valid_targets_mean": 3494.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 3.1545741324921135,
|
|
"grad_norm": 0.4166111698553452,
|
|
"learning_rate": 6.681792795750875e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24555368721485138,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4494.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.170347003154574,
|
|
"grad_norm": 0.3792621220742212,
|
|
"learning_rate": 6.652444649264856e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24369245767593384,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4548.4,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 3.186119873817035,
|
|
"grad_norm": 0.45428146737608754,
|
|
"learning_rate": 6.623032451062542e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21560588479042053,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2806.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 3.2018927444794953,
|
|
"grad_norm": 0.4396918249543382,
|
|
"learning_rate": 6.593557341213457e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22254452109336853,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2975.2,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.217665615141956,
|
|
"grad_norm": 0.42749423096915035,
|
|
"learning_rate": 6.564020462225679e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484319508075714,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3568.7,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 3.2334384858044163,
|
|
"grad_norm": 0.39159517401747657,
|
|
"learning_rate": 6.534422959001585e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22285829484462738,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3964.2,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 3.249211356466877,
|
|
"grad_norm": 0.4291962426431375,
|
|
"learning_rate": 6.504765978793443e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27856796979904175,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4023.6,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 3.2649842271293377,
|
|
"grad_norm": 0.4450053564657277,
|
|
"learning_rate": 6.475050671158961e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21933044493198395,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3322.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 3.280757097791798,
|
|
"grad_norm": 0.43034585118259266,
|
|
"learning_rate": 6.445278187916722e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2606717050075531,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3505.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 3.2965299684542586,
|
|
"grad_norm": 0.37410333228585696,
|
|
"learning_rate": 6.415449683101537e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2275415062904358,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4542.5,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 3.312302839116719,
|
|
"grad_norm": 0.40963729012232886,
|
|
"learning_rate": 6.385566312919716e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20051081478595734,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3469.5,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 3.32807570977918,
|
|
"grad_norm": 0.4396800514108122,
|
|
"learning_rate": 6.355629235704248e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2249513864517212,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3372.2,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 3.3438485804416405,
|
|
"grad_norm": 0.3906571239789868,
|
|
"learning_rate": 6.3256396118699e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2390715628862381,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4076.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 3.359621451104101,
|
|
"grad_norm": 0.4207281200709271,
|
|
"learning_rate": 6.295598603868246e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26639336347579956,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4140.6,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 3.3753943217665614,
|
|
"grad_norm": 0.41411028120988236,
|
|
"learning_rate": 6.265507376142594e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24677228927612305,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3294.0,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.3911671924290223,
|
|
"grad_norm": 0.4574462947352491,
|
|
"learning_rate": 6.235367095082867e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24753236770629883,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3098.3,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 3.406940063091483,
|
|
"grad_norm": 0.44568583062116435,
|
|
"learning_rate": 6.205178928980377e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23697753250598907,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3348.2,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 3.4227129337539433,
|
|
"grad_norm": 0.40272251602889164,
|
|
"learning_rate": 6.174944047982549e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23433184623718262,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3644.0,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 3.4384858044164037,
|
|
"grad_norm": 0.43918368499686794,
|
|
"learning_rate": 6.144663624047564e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24812614917755127,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3482.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 3.454258675078864,
|
|
"grad_norm": 0.4228096716595015,
|
|
"learning_rate": 6.114338830898922e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244436621665955,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3178.1,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 3.470031545741325,
|
|
"grad_norm": 0.39163077627657583,
|
|
"learning_rate": 6.083970843979957e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23379601538181305,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3980.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 3.4858044164037856,
|
|
"grad_norm": 0.4328683495707631,
|
|
"learning_rate": 6.0535608404082724e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863377332687378,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3707.3,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 3.501577287066246,
|
|
"grad_norm": 0.4102684667021291,
|
|
"learning_rate": 6.0231099989301086e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362220287322998,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3817.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.5173501577287065,
|
|
"grad_norm": 0.4640832182484784,
|
|
"learning_rate": 5.9926194998746624e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24722759425640106,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2821.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 3.5331230283911674,
|
|
"grad_norm": 0.4214315372040873,
|
|
"learning_rate": 5.9620905251083196e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25116032361984253,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4028.2,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 3.548895899053628,
|
|
"grad_norm": 0.4050858811434479,
|
|
"learning_rate": 5.931524257988864e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2557201087474823,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3980.5,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 3.5646687697160884,
|
|
"grad_norm": 0.406660075884935,
|
|
"learning_rate": 5.900921883319591e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23852787911891937,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4305.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 3.580441640378549,
|
|
"grad_norm": 0.38441971141571263,
|
|
"learning_rate": 5.870284587303394e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21825431287288666,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3953.8,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.5962145110410093,
|
|
"grad_norm": 0.43559756325600296,
|
|
"learning_rate": 5.839613557496776e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443239539861679,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3330.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.61198738170347,
|
|
"grad_norm": 0.4144684042912617,
|
|
"learning_rate": 5.808909982763825e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617552876472473,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3567.0,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 3.6277602523659307,
|
|
"grad_norm": 0.41419953192389397,
|
|
"learning_rate": 5.778175053230126e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499631643295288,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3682.4,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 3.643533123028391,
|
|
"grad_norm": 0.4248863441860044,
|
|
"learning_rate": 5.747409960236637e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362518310546875,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3552.7,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 3.6593059936908516,
|
|
"grad_norm": 0.4077084205643721,
|
|
"learning_rate": 5.716615896293501e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23393303155899048,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3526.3,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.6750788643533125,
|
|
"grad_norm": 0.42649978051773413,
|
|
"learning_rate": 5.68579405503383e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21634435653686523,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3465.7,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 3.690851735015773,
|
|
"grad_norm": 0.40336913679264713,
|
|
"learning_rate": 5.654945631167433e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26242315769195557,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4229.9,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 3.7066246056782335,
|
|
"grad_norm": 0.3665963077273498,
|
|
"learning_rate": 5.624071820434508e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2064288854598999,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4215.2,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 3.722397476340694,
|
|
"grad_norm": 0.3999997884440537,
|
|
"learning_rate": 5.593173819559294e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22290341556072235,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3652.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.7381703470031544,
|
|
"grad_norm": 0.3557096871710355,
|
|
"learning_rate": 5.562252826203687e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22109612822532654,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4395.5,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 3.753943217665615,
|
|
"grad_norm": 0.431122095793743,
|
|
"learning_rate": 5.531310038920805e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2331051081418991,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3222.1,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 3.769716088328076,
|
|
"grad_norm": 0.3880204576004606,
|
|
"learning_rate": 5.500346657108545e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2325371503829956,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3813.5,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 3.7854889589905363,
|
|
"grad_norm": 0.44598651479849705,
|
|
"learning_rate": 5.469363880963082e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24427175521850586,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3289.9,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.8012618296529967,
|
|
"grad_norm": 0.3931285059362459,
|
|
"learning_rate": 5.438362911432347e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518792152404785,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4406.3,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 3.8170347003154577,
|
|
"grad_norm": 0.4035669817262566,
|
|
"learning_rate": 5.407344950169486e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715667188167572,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4231.5,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 3.832807570977918,
|
|
"grad_norm": 0.3957915259954298,
|
|
"learning_rate": 5.376311199486268e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23946471512317657,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3859.7,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.8485804416403786,
|
|
"grad_norm": 0.383172680689461,
|
|
"learning_rate": 5.3452628623064934e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22085560858249664,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3614.2,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.864353312302839,
|
|
"grad_norm": 0.36385693259713353,
|
|
"learning_rate": 5.31420114211936e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752317488193512,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4839.8,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 3.8801261829652995,
|
|
"grad_norm": 0.370531037509034,
|
|
"learning_rate": 5.2831272429328116e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20723074674606323,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3841.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.89589905362776,
|
|
"grad_norm": 0.4465487481114569,
|
|
"learning_rate": 5.2520423692268775e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362392246723175,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2908.7,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.911671924290221,
|
|
"grad_norm": 0.40183616151013524,
|
|
"learning_rate": 5.220947725906975e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23403562605381012,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4062.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.9274447949526814,
|
|
"grad_norm": 0.36720727944432746,
|
|
"learning_rate": 5.18984451825721e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2156575322151184,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4244.2,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 3.943217665615142,
|
|
"grad_norm": 0.36739318896142137,
|
|
"learning_rate": 5.1587339518936585e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23408517241477966,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4298.3,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.958990536277603,
|
|
"grad_norm": 0.38433498745772443,
|
|
"learning_rate": 5.127617232717631e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26561039686203003,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4233.7,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 3.9747634069400632,
|
|
"grad_norm": 0.417328324144264,
|
|
"learning_rate": 5.096495566868935e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26622718572616577,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3380.5,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 3.9905362776025237,
|
|
"grad_norm": 0.40583456982881966,
|
|
"learning_rate": 5.065370160679115e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431681752204895,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3864.8,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.006309148264984,
|
|
"grad_norm": 0.4821387411125082,
|
|
"learning_rate": 5.034242220624706e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15413779020309448,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3352.2,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 4.022082018927445,
|
|
"grad_norm": 0.5507076000324466,
|
|
"learning_rate": 5.003112953280452e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13071414828300476,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3422.1,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 4.037854889589905,
|
|
"grad_norm": 0.4613969580120687,
|
|
"learning_rate": 4.971983565272553e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12674719095230103,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2750.3,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 4.053627760252366,
|
|
"grad_norm": 0.4152583203563749,
|
|
"learning_rate": 4.940855263231873e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208362877368927,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3728.6,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 4.069400630914826,
|
|
"grad_norm": 0.4482984088898629,
|
|
"learning_rate": 4.909729253747197e-05,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652022749185562,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3619.0,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 4.085173501577287,
|
|
"grad_norm": 0.399726958950638,
|
|
"learning_rate": 4.878606743318439e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11733722686767578,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3720.7,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.100946372239748,
|
|
"grad_norm": 0.46687530262082305,
|
|
"learning_rate": 4.8474889383098855e-05,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552787721157074,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3531.9,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.116719242902208,
|
|
"grad_norm": 0.4162751106492577,
|
|
"learning_rate": 4.816377044903428e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12626974284648895,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4288.3,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 4.132492113564669,
|
|
"grad_norm": 0.49835621727107526,
|
|
"learning_rate": 4.7852722690518196e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440780758857727,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2622.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.148264984227129,
|
|
"grad_norm": 0.43297110173064957,
|
|
"learning_rate": 4.75417581643192e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14077317714691162,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 4.16403785488959,
|
|
"grad_norm": 0.4263831486425033,
|
|
"learning_rate": 4.723088892397968e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13717670738697052,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3560.4,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.17981072555205,
|
|
"grad_norm": 0.40962122241683374,
|
|
"learning_rate": 4.6920127019348556e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14324375987052917,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4316.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 4.195583596214511,
|
|
"grad_norm": 0.39674021617148886,
|
|
"learning_rate": 4.6609484496114256e-05,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13059474527835846,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4042.8,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 4.211356466876971,
|
|
"grad_norm": 0.42194240284178985,
|
|
"learning_rate": 4.629897339533771e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11975990235805511,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3509.7,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.2271293375394325,
|
|
"grad_norm": 0.4348331012114048,
|
|
"learning_rate": 4.598860575298575e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14041069149971008,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3980.3,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 4.242902208201893,
|
|
"grad_norm": 0.47949363282852,
|
|
"learning_rate": 4.5678393599464435e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18775832653045654,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3573.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 4.2586750788643535,
|
|
"grad_norm": 0.42941461444596235,
|
|
"learning_rate": 4.5368348959152864e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13734444975852966,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3916.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 4.274447949526814,
|
|
"grad_norm": 0.4550660502056618,
|
|
"learning_rate": 4.505848384993696e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1418776959180832,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3606.0,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 4.290220820189274,
|
|
"grad_norm": 0.4330817089869141,
|
|
"learning_rate": 4.474881028274375e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301407814025879,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3551.2,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.305993690851735,
|
|
"grad_norm": 0.414328135805903,
|
|
"learning_rate": 4.4439340261075716e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14358800649642944,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3952.6,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 4.321766561514195,
|
|
"grad_norm": 0.3935072929678093,
|
|
"learning_rate": 4.413008578054558e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338864117860794,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4548.4,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 4.337539432176656,
|
|
"grad_norm": 0.43902808024930545,
|
|
"learning_rate": 4.3821058828411244e-05,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14350779354572296,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3322.0,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 4.353312302839116,
|
|
"grad_norm": 0.47832433617177983,
|
|
"learning_rate": 4.35122713831113e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16027812659740448,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3451.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 4.369085173501578,
|
|
"grad_norm": 0.3780995897403291,
|
|
"learning_rate": 4.320373541380054e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329713761806488,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4768.9,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 4.384858044164038,
|
|
"grad_norm": 0.41367517391687664,
|
|
"learning_rate": 4.289546287988614e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14861229062080383,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4518.6,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 4.400630914826499,
|
|
"grad_norm": 0.38660135861653716,
|
|
"learning_rate": 4.258746573056401e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13534846901893616,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4576.8,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 4.416403785488959,
|
|
"grad_norm": 0.43614094592909824,
|
|
"learning_rate": 4.2279755904355704e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16031000018119812,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4019.5,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.4321766561514195,
|
|
"grad_norm": 0.4121639292417491,
|
|
"learning_rate": 4.197234532864558e-05,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1366066038608551,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3927.2,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 4.44794952681388,
|
|
"grad_norm": 0.46001162500530945,
|
|
"learning_rate": 4.1665245919218544e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16069738566875458,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3374.1,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 4.4637223974763405,
|
|
"grad_norm": 0.4415524300689523,
|
|
"learning_rate": 4.135846957979811e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14812339842319489,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3858.5,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 4.479495268138801,
|
|
"grad_norm": 0.41572746327510646,
|
|
"learning_rate": 4.105202820158503e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14529840648174286,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4642.3,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 4.495268138801261,
|
|
"grad_norm": 0.40567170422574816,
|
|
"learning_rate": 4.074593366279636e-05,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11761084198951721,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3919.9,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.511041009463723,
|
|
"grad_norm": 0.43551327648741345,
|
|
"learning_rate": 4.044019782820505e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16616562008857727,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3753.2,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.526813880126183,
|
|
"grad_norm": 0.4304253714649899,
|
|
"learning_rate": 4.0134832548680006e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14028897881507874,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3892.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 4.542586750788644,
|
|
"grad_norm": 0.4300902798658754,
|
|
"learning_rate": 3.982984966072677e-05,
|
|
"loss": 0.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14914816617965698,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3653.2,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 4.558359621451104,
|
|
"grad_norm": 0.4657374621310647,
|
|
"learning_rate": 3.952526098602873e-05,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14326679706573486,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3503.7,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 4.574132492113565,
|
|
"grad_norm": 0.43365046360826953,
|
|
"learning_rate": 3.9221078330988806e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13995081186294556,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3612.9,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 4.589905362776025,
|
|
"grad_norm": 0.4727320068947522,
|
|
"learning_rate": 3.89173134862719e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1454961597919464,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3642.2,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 4.605678233438486,
|
|
"grad_norm": 0.4414031361451162,
|
|
"learning_rate": 3.861397822634784e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15584497153759003,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3764.9,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 4.621451104100946,
|
|
"grad_norm": 0.41187705436552824,
|
|
"learning_rate": 3.831108430903494e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14060024917125702,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4490.7,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 4.6372239747634065,
|
|
"grad_norm": 0.43315410457939646,
|
|
"learning_rate": 3.800864347504437e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14828018844127655,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3756.7,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 4.652996845425868,
|
|
"grad_norm": 0.4315038585680116,
|
|
"learning_rate": 3.7706667447524876e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16025035083293915,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3776.8,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 4.668769716088328,
|
|
"grad_norm": 0.4629700901684569,
|
|
"learning_rate": 3.740516793160855e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619722992181778,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3953.1,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 4.684542586750789,
|
|
"grad_norm": 0.4506448009518266,
|
|
"learning_rate": 3.710415661395699e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1594841182231903,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3689.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.700315457413249,
|
|
"grad_norm": 0.4595891610524585,
|
|
"learning_rate": 3.6803645162308376e-05,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17818719148635864,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3929.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 4.71608832807571,
|
|
"grad_norm": 0.4150467966875533,
|
|
"learning_rate": 3.6503645225025175e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16535528004169464,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4194.8,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 4.73186119873817,
|
|
"grad_norm": 0.4061399055435858,
|
|
"learning_rate": 3.620416843064266e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359515368938446,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4139.6,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 4.747634069400631,
|
|
"grad_norm": 0.4132881612276861,
|
|
"learning_rate": 3.5905226387418126e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458536684513092,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3785.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.763406940063091,
|
|
"grad_norm": 0.4882855781127496,
|
|
"learning_rate": 3.5606830682880965e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18403048813343048,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3238.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 4.779179810725552,
|
|
"grad_norm": 0.48302912366147344,
|
|
"learning_rate": 3.530899288338352e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15412986278533936,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3244.3,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 4.794952681388013,
|
|
"grad_norm": 0.41004786154940126,
|
|
"learning_rate": 3.501172453365268e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13483601808547974,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4170.5,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 4.8107255520504735,
|
|
"grad_norm": 0.4225330709889332,
|
|
"learning_rate": 3.471503715634252e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184939861297607,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3843.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.826498422712934,
|
|
"grad_norm": 0.37596497087636227,
|
|
"learning_rate": 3.44189422515875e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294434130191803,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4488.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.842271293375394,
|
|
"grad_norm": 0.38087264895160944,
|
|
"learning_rate": 3.4123451296556845e-05,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13751477003097534,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4411.7,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 4.858044164037855,
|
|
"grad_norm": 0.39361476642320653,
|
|
"learning_rate": 3.382857574500957e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13432013988494873,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4315.6,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 4.873817034700315,
|
|
"grad_norm": 0.42718530839186636,
|
|
"learning_rate": 3.3534327026850574e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16855821013450623,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4262.7,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 4.889589905362776,
|
|
"grad_norm": 0.3917349041475647,
|
|
"learning_rate": 3.324071654768754e-05,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1179637610912323,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3897.3,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.905362776025236,
|
|
"grad_norm": 0.4466560221892044,
|
|
"learning_rate": 3.2947755688388874e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13815364241600037,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3516.4,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.921135646687697,
|
|
"grad_norm": 0.4240920687523467,
|
|
"learning_rate": 3.26554558046426e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758045434951782,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4800.1,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.936908517350158,
|
|
"grad_norm": 0.39942977491928344,
|
|
"learning_rate": 3.236382822651606e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12246360629796982,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3751.0,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 4.952681388012619,
|
|
"grad_norm": 0.40923515884045225,
|
|
"learning_rate": 3.207288425801689e-05,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13511720299720764,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4188.2,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 4.968454258675079,
|
|
"grad_norm": 0.47290889467879393,
|
|
"learning_rate": 3.1782635176654764e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17075365781784058,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3425.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 4.9842271293375395,
|
|
"grad_norm": 0.44727850590756046,
|
|
"learning_rate": 3.149309223300428e-05,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16327592730522156,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3940.4,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4095300324275936,
|
|
"learning_rate": 3.120426665026891e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14301863312721252,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3949.0,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.0157728706624605,
|
|
"grad_norm": 0.4965073217628422,
|
|
"learning_rate": 3.091616962384587e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06953192502260208,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3511.0,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 5.031545741324921,
|
|
"grad_norm": 0.39829247464224404,
|
|
"learning_rate": 3.06288123208923e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07943619787693024,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3653.5,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.047318611987381,
|
|
"grad_norm": 0.36709273418268795,
|
|
"learning_rate": 3.034220587989226e-05,
|
|
"loss": 0.0785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0734889879822731,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4551.4,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 5.063091482649842,
|
|
"grad_norm": 0.46061848051569354,
|
|
"learning_rate": 3.005636141022512e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09356047958135605,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4240.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 5.078864353312303,
|
|
"grad_norm": 0.3863919471144128,
|
|
"learning_rate": 2.977128999173482e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07880861312150955,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4272.3,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 5.094637223974764,
|
|
"grad_norm": 0.4006873478397203,
|
|
"learning_rate": 2.948700267430049e-05,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07071110606193542,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3531.6,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.110410094637224,
|
|
"grad_norm": 0.35708727468781015,
|
|
"learning_rate": 2.920351047740808e-05,
|
|
"loss": 0.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07418598234653473,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4278.7,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 5.126182965299685,
|
|
"grad_norm": 0.36066950208005905,
|
|
"learning_rate": 2.892082438972325e-05,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07705340534448624,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4657.5,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 5.141955835962145,
|
|
"grad_norm": 0.3755353448825496,
|
|
"learning_rate": 2.863895536866541e-05,
|
|
"loss": 0.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0808149129152298,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4137.3,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 5.157728706624606,
|
|
"grad_norm": 0.38944321711541496,
|
|
"learning_rate": 2.835791433998301e-05,
|
|
"loss": 0.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07152563333511353,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3748.3,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.173501577287066,
|
|
"grad_norm": 0.3859159269519384,
|
|
"learning_rate": 2.807771219733004e-05,
|
|
"loss": 0.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07155505567789078,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4150.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 5.1892744479495265,
|
|
"grad_norm": 0.40877023575458493,
|
|
"learning_rate": 2.7798359801843766e-05,
|
|
"loss": 0.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07761353999376297,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3865.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 5.205047318611987,
|
|
"grad_norm": 0.4307421685519268,
|
|
"learning_rate": 2.7519867981723712e-05,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0829286277294159,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3674.1,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 5.220820189274448,
|
|
"grad_norm": 0.3970534191384527,
|
|
"learning_rate": 2.724224753181197e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09701694548130035,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3567.1,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 5.236593059936909,
|
|
"grad_norm": 0.34934506488749156,
|
|
"learning_rate": 2.6965509213174777e-05,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06851125508546829,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4961.3,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 5.252365930599369,
|
|
"grad_norm": 0.39059286750879313,
|
|
"learning_rate": 2.6689663752685334e-05,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07081689685583115,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3873.0,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 5.26813880126183,
|
|
"grad_norm": 0.4374595810187239,
|
|
"learning_rate": 2.641472184260809e-05,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0809621810913086,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3478.6,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 5.28391167192429,
|
|
"grad_norm": 0.42133117621400595,
|
|
"learning_rate": 2.614069414018428e-05,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08253119140863419,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3928.3,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.299684542586751,
|
|
"grad_norm": 0.4013385383890111,
|
|
"learning_rate": 2.5867591267218805e-05,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08884066343307495,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4909.7,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 5.315457413249211,
|
|
"grad_norm": 0.39344189247215844,
|
|
"learning_rate": 2.5595423809668452e-05,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07590728253126144,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4204.9,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 5.331230283911672,
|
|
"grad_norm": 0.3542083009513637,
|
|
"learning_rate": 2.532420231723172e-05,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06620984524488449,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5040.6,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.347003154574132,
|
|
"grad_norm": 0.3955609190369812,
|
|
"learning_rate": 2.5053937302939767e-05,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07462330162525177,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3349.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.3627760252365935,
|
|
"grad_norm": 0.40977897760118465,
|
|
"learning_rate": 2.4784639242748953e-05,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0794137567281723,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3732.3,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 5.378548895899054,
|
|
"grad_norm": 0.3936043142614415,
|
|
"learning_rate": 2.451631857513472e-05,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0751621276140213,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3926.6,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 5.394321766561514,
|
|
"grad_norm": 0.39725808754116887,
|
|
"learning_rate": 2.4248985700687084e-05,
|
|
"loss": 0.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0900493860244751,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4563.2,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 5.410094637223975,
|
|
"grad_norm": 0.38859670174776123,
|
|
"learning_rate": 2.39826509817074e-05,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06861039251089096,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3246.6,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 5.425867507886435,
|
|
"grad_norm": 0.3977406185047871,
|
|
"learning_rate": 2.3717324741806718e-05,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07713861018419266,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4137.5,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.441640378548896,
|
|
"grad_norm": 0.3880850246395261,
|
|
"learning_rate": 2.3453017265505673e-05,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07314358651638031,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3994.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.457413249211356,
|
|
"grad_norm": 0.3673248293665118,
|
|
"learning_rate": 2.3189738797835708e-05,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07732352614402771,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4492.2,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.473186119873817,
|
|
"grad_norm": 0.39818019842028823,
|
|
"learning_rate": 2.292749954394216e-05,
|
|
"loss": 0.0767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09227725863456726,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4683.8,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 5.488958990536277,
|
|
"grad_norm": 0.440987757424753,
|
|
"learning_rate": 2.266630966868852e-05,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10240776091814041,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3867.3,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 5.504731861198739,
|
|
"grad_norm": 0.4048674586856181,
|
|
"learning_rate": 2.2406179296262453e-05,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0714288204908371,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3286.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 5.520504731861199,
|
|
"grad_norm": 0.4145008180790608,
|
|
"learning_rate": 2.2147118509783445e-05,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08144058287143707,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3699.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.5362776025236595,
|
|
"grad_norm": 0.40904048276023325,
|
|
"learning_rate": 2.1889137350911894e-05,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08297403901815414,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3566.0,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 5.55205047318612,
|
|
"grad_norm": 0.40162391892165394,
|
|
"learning_rate": 2.1632245819459913e-05,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07219391316175461,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4101.7,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 5.5678233438485805,
|
|
"grad_norm": 0.3716162176336755,
|
|
"learning_rate": 2.1376453873003664e-05,
|
|
"loss": 0.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07064441591501236,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4033.3,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 5.583596214511041,
|
|
"grad_norm": 0.38872515423031256,
|
|
"learning_rate": 2.112177142649746e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07718878984451294,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4244.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.599369085173501,
|
|
"grad_norm": 0.41942771920686045,
|
|
"learning_rate": 2.0868208351889402e-05,
|
|
"loss": 0.0767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08104390650987625,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3666.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 5.615141955835962,
|
|
"grad_norm": 0.4011121159572857,
|
|
"learning_rate": 2.0615774477738738e-05,
|
|
"loss": 0.0829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09074635058641434,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4433.3,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 5.630914826498422,
|
|
"grad_norm": 0.3756850116684875,
|
|
"learning_rate": 2.0364479588834835e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07267570495605469,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3606.8,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.646687697160884,
|
|
"grad_norm": 0.3809432143334385,
|
|
"learning_rate": 2.0114333425817993e-05,
|
|
"loss": 0.0884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07739441841840744,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3818.2,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 5.662460567823344,
|
|
"grad_norm": 0.42215943694545005,
|
|
"learning_rate": 1.9865345684801846e-05,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08445312082767487,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4031.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 5.678233438485805,
|
|
"grad_norm": 0.44133345444932687,
|
|
"learning_rate": 1.9617526016997486e-05,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09316886961460114,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3595.4,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.694006309148265,
|
|
"grad_norm": 0.4028659473927513,
|
|
"learning_rate": 1.937088402833943e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07645547389984131,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3580.8,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 5.709779179810726,
|
|
"grad_norm": 0.3688483280116813,
|
|
"learning_rate": 1.9125429279113173e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07246281206607819,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4382.7,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 5.725552050473186,
|
|
"grad_norm": 0.4214260085884563,
|
|
"learning_rate": 1.8881171283584752e-05,
|
|
"loss": 0.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776229083538055,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3844.8,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.7413249211356465,
|
|
"grad_norm": 0.4179569614889491,
|
|
"learning_rate": 1.8638119509631853e-05,
|
|
"loss": 0.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08015948534011841,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3177.2,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 5.757097791798107,
|
|
"grad_norm": 0.4128154611167564,
|
|
"learning_rate": 1.839628337837686e-05,
|
|
"loss": 0.0798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0777365192770958,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3667.2,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 5.7728706624605675,
|
|
"grad_norm": 0.40473025860177675,
|
|
"learning_rate": 1.8155672263821666e-05,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08187016099691391,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3946.0,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.788643533123029,
|
|
"grad_norm": 0.4071920066237988,
|
|
"learning_rate": 1.7916295492484315e-05,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08413612842559814,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3472.1,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 5.804416403785489,
|
|
"grad_norm": 0.445650703253756,
|
|
"learning_rate": 1.7678162343037524e-05,
|
|
"loss": 0.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08886728435754776,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3285.9,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 5.82018927444795,
|
|
"grad_norm": 0.45763369485934957,
|
|
"learning_rate": 1.744128204594893e-05,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08573994040489197,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3639.5,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.83596214511041,
|
|
"grad_norm": 0.40548342584984587,
|
|
"learning_rate": 1.7205663783123436e-05,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07565011829137802,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3692.7,
|
|
"valid_targets_min": 1781
|
|
},
|
|
{
|
|
"epoch": 5.851735015772871,
|
|
"grad_norm": 0.3967368097642185,
|
|
"learning_rate": 1.6971316687547213e-05,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07565818727016449,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3699.7,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.867507886435331,
|
|
"grad_norm": 0.3858704903050818,
|
|
"learning_rate": 1.6738249842933697e-05,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07207715511322021,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3578.0,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.883280757097792,
|
|
"grad_norm": 0.4145062078348231,
|
|
"learning_rate": 1.6506472283371527e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0762171596288681,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3325.8,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 5.899053627760252,
|
|
"grad_norm": 0.4057325123132319,
|
|
"learning_rate": 1.6275992992974308e-05,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07129954546689987,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3912.2,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 5.914826498422713,
|
|
"grad_norm": 0.3837253838222537,
|
|
"learning_rate": 1.604682090553243e-05,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06977653503417969,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3829.0,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 5.930599369085174,
|
|
"grad_norm": 0.3768656129657237,
|
|
"learning_rate": 1.5818964904166756e-05,
|
|
"loss": 0.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08876457065343857,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4444.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.946372239747634,
|
|
"grad_norm": 0.42683847888241805,
|
|
"learning_rate": 1.55924338209843e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09318177402019501,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3591.9,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 5.962145110410095,
|
|
"grad_norm": 0.4563886592811939,
|
|
"learning_rate": 1.536723643673582e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0869009792804718,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3317.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 5.977917981072555,
|
|
"grad_norm": 0.4058601731267914,
|
|
"learning_rate": 1.5143381480475583e-05,
|
|
"loss": 0.0835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08817443996667862,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4137.1,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 5.993690851735016,
|
|
"grad_norm": 0.42511998845878957,
|
|
"learning_rate": 1.49208776292229e-05,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08329896628856659,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3452.4,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.009463722397476,
|
|
"grad_norm": 0.3423195765299723,
|
|
"learning_rate": 1.4699733507625862e-05,
|
|
"loss": 0.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04928043857216835,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4499.2,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.025236593059937,
|
|
"grad_norm": 0.42895594904272516,
|
|
"learning_rate": 1.4479957687626933e-05,
|
|
"loss": 0.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04039411246776581,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3946.0,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 6.041009463722397,
|
|
"grad_norm": 0.3345370252031064,
|
|
"learning_rate": 1.4261558688130838e-05,
|
|
"loss": 0.04,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04135072976350784,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3727.4,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 6.056782334384858,
|
|
"grad_norm": 0.31310879384045104,
|
|
"learning_rate": 1.4044544974674246e-05,
|
|
"loss": 0.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03876585513353348,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3372.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.072555205047319,
|
|
"grad_norm": 0.3557013457592405,
|
|
"learning_rate": 1.3828924959097612e-05,
|
|
"loss": 0.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04300154745578766,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4150.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 6.0883280757097795,
|
|
"grad_norm": 0.3615210747778512,
|
|
"learning_rate": 1.3614706999219213e-05,
|
|
"loss": 0.0437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043791744858026505,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 6.10410094637224,
|
|
"grad_norm": 0.33923014031111215,
|
|
"learning_rate": 1.340189939851112e-05,
|
|
"loss": 0.0382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04425927996635437,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3822.5,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 6.1198738170347005,
|
|
"grad_norm": 0.30112996820412613,
|
|
"learning_rate": 1.3190510405777345e-05,
|
|
"loss": 0.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03676736354827881,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4339.9,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 6.135646687697161,
|
|
"grad_norm": 0.35740080693164195,
|
|
"learning_rate": 1.2980548214834142e-05,
|
|
"loss": 0.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038491666316986084,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4319.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.151419558359621,
|
|
"grad_norm": 0.28103551068841687,
|
|
"learning_rate": 1.2772020964192316e-05,
|
|
"loss": 0.0381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030903656035661697,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4118.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.167192429022082,
|
|
"grad_norm": 0.37464024197138274,
|
|
"learning_rate": 1.2564936736741867e-05,
|
|
"loss": 0.0426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048352181911468506,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3616.3,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 6.182965299684542,
|
|
"grad_norm": 0.29473480639763844,
|
|
"learning_rate": 1.23593035594386e-05,
|
|
"loss": 0.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03645594045519829,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4119.2,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 6.198738170347003,
|
|
"grad_norm": 0.31569412821264303,
|
|
"learning_rate": 1.215512940299305e-05,
|
|
"loss": 0.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031205907464027405,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3533.7,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 6.214511041009464,
|
|
"grad_norm": 0.32535633424487764,
|
|
"learning_rate": 1.1952422181561424e-05,
|
|
"loss": 0.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043385475873947144,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3445.0,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 6.230283911671925,
|
|
"grad_norm": 0.3177206593761059,
|
|
"learning_rate": 1.1751189752438957e-05,
|
|
"loss": 0.0427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03753677383065224,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4261.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 6.246056782334385,
|
|
"grad_norm": 0.3280457853812721,
|
|
"learning_rate": 1.1551439915755274e-05,
|
|
"loss": 0.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034581586718559265,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3294.8,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 6.261829652996846,
|
|
"grad_norm": 0.35275018539696723,
|
|
"learning_rate": 1.135318041417207e-05,
|
|
"loss": 0.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04116068780422211,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3912.3,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 6.277602523659306,
|
|
"grad_norm": 0.31938399505639403,
|
|
"learning_rate": 1.1156418932582941e-05,
|
|
"loss": 0.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03801509737968445,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4323.4,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 6.2933753943217665,
|
|
"grad_norm": 0.33917209363685086,
|
|
"learning_rate": 1.096116309781558e-05,
|
|
"loss": 0.0391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04274391382932663,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4026.5,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 6.309148264984227,
|
|
"grad_norm": 0.3325282925211043,
|
|
"learning_rate": 1.0767420478336093e-05,
|
|
"loss": 0.0414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03704637289047241,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4074.3,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 6.3249211356466875,
|
|
"grad_norm": 0.37053697656386647,
|
|
"learning_rate": 1.0575198583955698e-05,
|
|
"loss": 0.042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04724776744842529,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3510.2,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 6.340694006309148,
|
|
"grad_norm": 0.3031965212003397,
|
|
"learning_rate": 1.0384504865539497e-05,
|
|
"loss": 0.0418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043518148362636566,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4591.8,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 6.356466876971609,
|
|
"grad_norm": 0.3245643524022609,
|
|
"learning_rate": 1.0195346714717813e-05,
|
|
"loss": 0.0374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03330378234386444,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3551.2,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 6.37223974763407,
|
|
"grad_norm": 0.35181283985023565,
|
|
"learning_rate": 1.0007731463599601e-05,
|
|
"loss": 0.0417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04024139419198036,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3170.5,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 6.38801261829653,
|
|
"grad_norm": 0.34299218359203976,
|
|
"learning_rate": 9.82166638448827e-06,
|
|
"loss": 0.0404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039584726095199585,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3978.4,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.403785488958991,
|
|
"grad_norm": 0.33522795043617415,
|
|
"learning_rate": 9.637158689599746e-06,
|
|
"loss": 0.042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03583469241857529,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3679.2,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 6.419558359621451,
|
|
"grad_norm": 0.32363944460272953,
|
|
"learning_rate": 9.454215530782994e-06,
|
|
"loss": 0.0422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04818218946456909,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4480.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 6.435331230283912,
|
|
"grad_norm": 0.30372102594833117,
|
|
"learning_rate": 9.272843999242736e-06,
|
|
"loss": 0.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03335719555616379,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3853.0,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 6.451104100946372,
|
|
"grad_norm": 0.3671546063188256,
|
|
"learning_rate": 9.093051125264623e-06,
|
|
"loss": 0.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0437227264046669,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3309.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.466876971608833,
|
|
"grad_norm": 0.3328681104217597,
|
|
"learning_rate": 8.91484387794267e-06,
|
|
"loss": 0.0395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03580016642808914,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3748.2,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 6.482649842271293,
|
|
"grad_norm": 0.3662060321902368,
|
|
"learning_rate": 8.73822916490919e-06,
|
|
"loss": 0.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055983349680900574,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3675.6,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 6.498422712933754,
|
|
"grad_norm": 0.34580136378911885,
|
|
"learning_rate": 8.563213832067014e-06,
|
|
"loss": 0.0394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037559874355793,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3501.8,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 6.514195583596215,
|
|
"grad_norm": 0.345320359793692,
|
|
"learning_rate": 8.389804663324142e-06,
|
|
"loss": 0.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040657106786966324,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3831.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 6.529968454258675,
|
|
"grad_norm": 0.3371252768221188,
|
|
"learning_rate": 8.218008380330723e-06,
|
|
"loss": 0.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04118881747126579,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3966.3,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 6.545741324921136,
|
|
"grad_norm": 0.3012902209870019,
|
|
"learning_rate": 8.047831642218611e-06,
|
|
"loss": 0.0423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03993421420454979,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4332.0,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 6.561514195583596,
|
|
"grad_norm": 0.2765052846744795,
|
|
"learning_rate": 7.879281045343184e-06,
|
|
"loss": 0.0398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03392912447452545,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5162.8,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 6.577287066246057,
|
|
"grad_norm": 0.3295787559358221,
|
|
"learning_rate": 7.712363123027678e-06,
|
|
"loss": 0.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03678988665342331,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3544.0,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.593059936908517,
|
|
"grad_norm": 0.3372733751760112,
|
|
"learning_rate": 7.547084345309924e-06,
|
|
"loss": 0.0395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04084780439734459,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3407.8,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.608832807570978,
|
|
"grad_norm": 0.33791441111307363,
|
|
"learning_rate": 7.383451118691576e-06,
|
|
"loss": 0.0381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035366613417863846,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3889.1,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 6.624605678233438,
|
|
"grad_norm": 0.33176711428490663,
|
|
"learning_rate": 7.221469785889784e-06,
|
|
"loss": 0.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041747353971004486,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3948.8,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 6.6403785488958995,
|
|
"grad_norm": 0.5978543493140935,
|
|
"learning_rate": 7.061146625591331e-06,
|
|
"loss": 0.0397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049385398626327515,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3850.4,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 6.65615141955836,
|
|
"grad_norm": 0.3454267755353442,
|
|
"learning_rate": 6.902487852209238e-06,
|
|
"loss": 0.0372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04414697363972664,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4047.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 6.6719242902208205,
|
|
"grad_norm": 0.29185414383492303,
|
|
"learning_rate": 6.7454996156419485e-06,
|
|
"loss": 0.0364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034449975937604904,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4683.7,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.687697160883281,
|
|
"grad_norm": 0.34219816454313545,
|
|
"learning_rate": 6.590188001034864e-06,
|
|
"loss": 0.0377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04172135889530182,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3884.6,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 6.703470031545741,
|
|
"grad_norm": 0.3369809868933638,
|
|
"learning_rate": 6.436559028544559e-06,
|
|
"loss": 0.0395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03876055032014847,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3708.0,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.719242902208202,
|
|
"grad_norm": 0.315311974215639,
|
|
"learning_rate": 6.284618653105328e-06,
|
|
"loss": 0.0391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03876154497265816,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3923.0,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 6.735015772870662,
|
|
"grad_norm": 0.3373353918960485,
|
|
"learning_rate": 6.134372764198465e-06,
|
|
"loss": 0.0394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0393822006881237,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3684.7,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.750788643533123,
|
|
"grad_norm": 0.32392467184278273,
|
|
"learning_rate": 5.985827185623899e-06,
|
|
"loss": 0.0363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03698527812957764,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3927.2,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 6.766561514195583,
|
|
"grad_norm": 0.3443965456972307,
|
|
"learning_rate": 5.8389876752745045e-06,
|
|
"loss": 0.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038646962493658066,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3486.0,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 6.782334384858045,
|
|
"grad_norm": 0.32713151736206264,
|
|
"learning_rate": 5.693859924912892e-06,
|
|
"loss": 0.0384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0415056087076664,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4044.9,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 6.798107255520505,
|
|
"grad_norm": 0.30958861178255653,
|
|
"learning_rate": 5.550449559950755e-06,
|
|
"loss": 0.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03296856954693794,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3729.6,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 6.813880126182966,
|
|
"grad_norm": 0.29552798234216193,
|
|
"learning_rate": 5.408762139230888e-06,
|
|
"loss": 0.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03640144318342209,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4315.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.829652996845426,
|
|
"grad_norm": 0.34460179052643625,
|
|
"learning_rate": 5.268803154811669e-06,
|
|
"loss": 0.0359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03762969374656677,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3706.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.8454258675078865,
|
|
"grad_norm": 0.31528346088217085,
|
|
"learning_rate": 5.1305780317541855e-06,
|
|
"loss": 0.0414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040562450885772705,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4305.6,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 6.861198738170347,
|
|
"grad_norm": 0.3122264620036574,
|
|
"learning_rate": 4.99409212791192e-06,
|
|
"loss": 0.0393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040793538093566895,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4401.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 6.8769716088328074,
|
|
"grad_norm": 0.31459386735001565,
|
|
"learning_rate": 4.8593507337231666e-06,
|
|
"loss": 0.0419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0354890376329422,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3697.2,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 6.892744479495268,
|
|
"grad_norm": 0.32466893557156645,
|
|
"learning_rate": 4.726359072005859e-06,
|
|
"loss": 0.0429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03945270925760269,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3481.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 6.908517350157728,
|
|
"grad_norm": 0.3333960510985479,
|
|
"learning_rate": 4.5951222977551444e-06,
|
|
"loss": 0.0414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05161520838737488,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3759.1,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 6.92429022082019,
|
|
"grad_norm": 0.2667648088422186,
|
|
"learning_rate": 4.465645497943621e-06,
|
|
"loss": 0.0359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03310135751962662,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4283.0,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 6.94006309148265,
|
|
"grad_norm": 0.35169135244926236,
|
|
"learning_rate": 4.337933691324109e-06,
|
|
"loss": 0.0361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04485854506492615,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3564.5,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 6.955835962145111,
|
|
"grad_norm": 0.3454019187343065,
|
|
"learning_rate": 4.21199182823514e-06,
|
|
"loss": 0.0359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039734575897455215,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4191.7,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.971608832807571,
|
|
"grad_norm": 0.3400328431593489,
|
|
"learning_rate": 4.08782479040905e-06,
|
|
"loss": 0.0413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03961391746997833,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3994.3,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.987381703470032,
|
|
"grad_norm": 0.3445618162748776,
|
|
"learning_rate": 3.9654373907827665e-06,
|
|
"loss": 0.0391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04041258245706558,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3696.0,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 7.003154574132492,
|
|
"grad_norm": 0.24808709523615394,
|
|
"learning_rate": 3.844834373311257e-06,
|
|
"loss": 0.0368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025598838925361633,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4450.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 7.018927444794953,
|
|
"grad_norm": 0.20311641174529407,
|
|
"learning_rate": 3.7260204127836316e-06,
|
|
"loss": 0.0241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02267434261739254,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4686.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 7.034700315457413,
|
|
"grad_norm": 0.2913507276372168,
|
|
"learning_rate": 3.609000114641964e-06,
|
|
"loss": 0.0222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023478612303733826,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4037.2,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 7.0504731861198735,
|
|
"grad_norm": 0.25188372175946383,
|
|
"learning_rate": 3.4937780148027344e-06,
|
|
"loss": 0.021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020064473152160645,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3790.7,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 7.066246056782334,
|
|
"grad_norm": 0.2799468731885234,
|
|
"learning_rate": 3.3803585794810466e-06,
|
|
"loss": 0.0219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021671026945114136,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3482.3,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 7.082018927444795,
|
|
"grad_norm": 0.22901515937297012,
|
|
"learning_rate": 3.2687462050175034e-06,
|
|
"loss": 0.0233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022264134138822556,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4568.3,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 7.097791798107256,
|
|
"grad_norm": 0.21965871988446903,
|
|
"learning_rate": 3.1589452177077815e-06,
|
|
"loss": 0.0239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020710987970232964,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4003.0,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 7.113564668769716,
|
|
"grad_norm": 0.2846193782550197,
|
|
"learning_rate": 3.0509598736349343e-06,
|
|
"loss": 0.0232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027990806847810745,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3473.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 7.129337539432177,
|
|
"grad_norm": 0.25342807787240196,
|
|
"learning_rate": 2.9447943585044545e-06,
|
|
"loss": 0.0209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01980901136994362,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3686.9,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 7.145110410094637,
|
|
"grad_norm": 0.22599689314876098,
|
|
"learning_rate": 2.840452787481979e-06,
|
|
"loss": 0.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022050093859434128,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4373.3,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 7.160883280757098,
|
|
"grad_norm": 0.28769966313988754,
|
|
"learning_rate": 2.7379392050338236e-06,
|
|
"loss": 0.0226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02312508039176464,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3584.4,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 7.176656151419558,
|
|
"grad_norm": 0.260860431305003,
|
|
"learning_rate": 2.63725758477017e-06,
|
|
"loss": 0.0226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02554130181670189,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3841.9,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 7.192429022082019,
|
|
"grad_norm": 0.24635859173031086,
|
|
"learning_rate": 2.5384118292910818e-06,
|
|
"loss": 0.0226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021089503541588783,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3992.6,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 7.208201892744479,
|
|
"grad_norm": 0.2333594074028827,
|
|
"learning_rate": 2.4414057700351934e-06,
|
|
"loss": 0.0212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020846040919423103,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3938.8,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 7.2239747634069404,
|
|
"grad_norm": 0.25966767492658177,
|
|
"learning_rate": 2.34624316713124e-06,
|
|
"loss": 0.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020981695502996445,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3504.3,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 7.239747634069401,
|
|
"grad_norm": 0.22153240765470747,
|
|
"learning_rate": 2.2529277092522503e-06,
|
|
"loss": 0.0226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020532969385385513,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4253.0,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 7.255520504731861,
|
|
"grad_norm": 0.2559811792469891,
|
|
"learning_rate": 2.1614630134726367e-06,
|
|
"loss": 0.0197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022255118936300278,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4342.4,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 7.271293375394322,
|
|
"grad_norm": 0.22249476827291575,
|
|
"learning_rate": 2.0718526251279346e-06,
|
|
"loss": 0.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020419348031282425,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4521.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 7.287066246056782,
|
|
"grad_norm": 0.22883166477771633,
|
|
"learning_rate": 1.9841000176774148e-06,
|
|
"loss": 0.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02150961011648178,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4188.1,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 7.302839116719243,
|
|
"grad_norm": 0.26440175569833124,
|
|
"learning_rate": 1.898208592569406e-06,
|
|
"loss": 0.021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022997155785560608,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3923.1,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 7.318611987381703,
|
|
"grad_norm": 0.24646636085476814,
|
|
"learning_rate": 1.8141816791095e-06,
|
|
"loss": 0.0236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02459106408059597,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4077.3,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 7.334384858044164,
|
|
"grad_norm": 0.2474218671338873,
|
|
"learning_rate": 1.7320225343314566e-06,
|
|
"loss": 0.0211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01931830868124962,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3470.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 7.350157728706624,
|
|
"grad_norm": 0.2615793701812396,
|
|
"learning_rate": 1.6517343428709975e-06,
|
|
"loss": 0.0238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022896194830536842,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3343.2,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 7.365930599369086,
|
|
"grad_norm": 0.24866049646271576,
|
|
"learning_rate": 1.5733202168423055e-06,
|
|
"loss": 0.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022526774555444717,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3570.8,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 7.381703470031546,
|
|
"grad_norm": 0.24398100838951153,
|
|
"learning_rate": 1.4967831957174606e-06,
|
|
"loss": 0.0215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019462652504444122,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4364.1,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 7.3974763406940065,
|
|
"grad_norm": 0.27051410831935213,
|
|
"learning_rate": 1.4221262462085715e-06,
|
|
"loss": 0.0251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025676360353827477,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3603.3,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 7.413249211356467,
|
|
"grad_norm": 0.3096679579306045,
|
|
"learning_rate": 1.3493522621528088e-06,
|
|
"loss": 0.0244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024824511259794235,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3219.4,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 7.429022082018927,
|
|
"grad_norm": 0.252879366286297,
|
|
"learning_rate": 1.2784640644002366e-06,
|
|
"loss": 0.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022230399772524834,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3287.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 7.444794952681388,
|
|
"grad_norm": 0.20998147562039438,
|
|
"learning_rate": 1.209464400704452e-06,
|
|
"loss": 0.0206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01767810247838497,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4389.2,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 7.460567823343848,
|
|
"grad_norm": 0.27043175393365976,
|
|
"learning_rate": 1.1423559456160803e-06,
|
|
"loss": 0.0223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02215229906141758,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3194.0,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 7.476340694006309,
|
|
"grad_norm": 0.2564017112706904,
|
|
"learning_rate": 1.0771413003791253e-06,
|
|
"loss": 0.0202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020249977707862854,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3954.9,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 7.492113564668769,
|
|
"grad_norm": 0.22802210655502397,
|
|
"learning_rate": 1.0138229928301212e-06,
|
|
"loss": 0.0212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018750024959445,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3741.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 7.50788643533123,
|
|
"grad_norm": 0.22394765960486246,
|
|
"learning_rate": 9.524034773001511e-07,
|
|
"loss": 0.0193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016100244596600533,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3365.4,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 7.523659305993691,
|
|
"grad_norm": 0.23405243563989528,
|
|
"learning_rate": 8.928851345197165e-07,
|
|
"loss": 0.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02146870270371437,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3963.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 7.539432176656152,
|
|
"grad_norm": 0.22220117881236737,
|
|
"learning_rate": 8.352702715264726e-07,
|
|
"loss": 0.022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018834460526704788,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4035.9,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 7.555205047318612,
|
|
"grad_norm": 0.2824422608959941,
|
|
"learning_rate": 7.795611215757615e-07,
|
|
"loss": 0.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02203229069709778,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3184.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 7.570977917981073,
|
|
"grad_norm": 0.24152675863727685,
|
|
"learning_rate": 7.257598440540802e-07,
|
|
"loss": 0.025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020647384226322174,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3745.3,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 7.586750788643533,
|
|
"grad_norm": 0.25427069001772656,
|
|
"learning_rate": 6.738685243953769e-07,
|
|
"loss": 0.0245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022360961884260178,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4033.4,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 7.6025236593059935,
|
|
"grad_norm": 0.2447796849963383,
|
|
"learning_rate": 6.238891740002195e-07,
|
|
"loss": 0.0213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02267741598188877,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4396.5,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 7.618296529968454,
|
|
"grad_norm": 0.1947016798039757,
|
|
"learning_rate": 5.758237301577874e-07,
|
|
"loss": 0.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017979349941015244,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4253.2,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 7.634069400630915,
|
|
"grad_norm": 0.26242474747372385,
|
|
"learning_rate": 5.296740559708413e-07,
|
|
"loss": 0.0224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025327244773507118,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4355.0,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 7.649842271293375,
|
|
"grad_norm": 0.2873187770068635,
|
|
"learning_rate": 4.854419402834709e-07,
|
|
"loss": 0.0231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029193349182605743,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3987.2,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 7.665615141955836,
|
|
"grad_norm": 0.25080676062406615,
|
|
"learning_rate": 4.431290976117497e-07,
|
|
"loss": 0.0224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02236909046769142,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3526.8,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 7.681388012618297,
|
|
"grad_norm": 0.2679068276217551,
|
|
"learning_rate": 4.0273716807731067e-07,
|
|
"loss": 0.0223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02555161528289318,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3574.3,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 7.697160883280757,
|
|
"grad_norm": 0.2527799820599618,
|
|
"learning_rate": 3.642677173437137e-07,
|
|
"loss": 0.023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022830359637737274,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3984.7,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 7.712933753943218,
|
|
"grad_norm": 0.24723303602718177,
|
|
"learning_rate": 3.2772223655583857e-07,
|
|
"loss": 0.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022257965058088303,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4166.3,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 7.728706624605678,
|
|
"grad_norm": 0.24609326590877015,
|
|
"learning_rate": 2.9310214228202013e-07,
|
|
"loss": 0.0235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01870781183242798,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3395.5,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 7.744479495268139,
|
|
"grad_norm": 0.2455031161070336,
|
|
"learning_rate": 2.604087764591534e-07,
|
|
"loss": 0.0219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021903114393353462,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4218.6,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 7.760252365930599,
|
|
"grad_norm": 0.2800194629999433,
|
|
"learning_rate": 2.2964340634069603e-07,
|
|
"loss": 0.0232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022704198956489563,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3092.3,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 7.7760252365930596,
|
|
"grad_norm": 0.2584271088036407,
|
|
"learning_rate": 2.0080722444754118e-07,
|
|
"loss": 0.0234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021550491452217102,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3626.9,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 7.79179810725552,
|
|
"grad_norm": 0.22787140593944627,
|
|
"learning_rate": 1.7390134852177664e-07,
|
|
"loss": 0.0258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01774200238287449,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3878.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 7.807570977917981,
|
|
"grad_norm": 0.30621669971194165,
|
|
"learning_rate": 1.48926821483375e-07,
|
|
"loss": 0.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0292455293238163,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3496.4,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 7.823343848580442,
|
|
"grad_norm": 0.22754985262911764,
|
|
"learning_rate": 1.2588461138977604e-07,
|
|
"loss": 0.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020763764157891273,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4017.9,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 7.839116719242902,
|
|
"grad_norm": 0.24205972437180406,
|
|
"learning_rate": 1.0477561139832781e-07,
|
|
"loss": 0.024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02504826709628105,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4303.2,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 7.854889589905363,
|
|
"grad_norm": 0.2525430850860654,
|
|
"learning_rate": 8.560063973171439e-08,
|
|
"loss": 0.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027818750590085983,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3742.2,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 7.870662460567823,
|
|
"grad_norm": 0.21015295804895337,
|
|
"learning_rate": 6.836043964620342e-08,
|
|
"loss": 0.0213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017499791458249092,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4126.9,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 7.886435331230284,
|
|
"grad_norm": 0.23969974208310643,
|
|
"learning_rate": 5.3055679402846946e-08,
|
|
"loss": 0.0222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022856732830405235,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 7.902208201892744,
|
|
"grad_norm": 0.26209668108487033,
|
|
"learning_rate": 3.968695224158547e-08,
|
|
"loss": 0.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024923603981733322,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3558.2,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 7.917981072555205,
|
|
"grad_norm": 0.258239974989192,
|
|
"learning_rate": 2.8254776358238588e-08,
|
|
"loss": 0.0232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022169824689626694,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4173.0,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 7.933753943217665,
|
|
"grad_norm": 0.2103350701053391,
|
|
"learning_rate": 1.8759594884443233e-08,
|
|
"loss": 0.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017467107623815536,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4391.8,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 7.9495268138801265,
|
|
"grad_norm": 0.25141140585193267,
|
|
"learning_rate": 1.1201775870445242e-08,
|
|
"loss": 0.0208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02066458761692047,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4108.5,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 7.965299684542587,
|
|
"grad_norm": 0.2245523196062546,
|
|
"learning_rate": 5.581612270855186e-09,
|
|
"loss": 0.0223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018932700157165527,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4325.3,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 7.981072555205047,
|
|
"grad_norm": 0.271744707250328,
|
|
"learning_rate": 1.8993219332907877e-09,
|
|
"loss": 0.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023682236671447754,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3315.0,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 7.996845425867508,
|
|
"grad_norm": 0.26771159667476085,
|
|
"learning_rate": 1.5504758992257451e-10,
|
|
"loss": 0.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02129640057682991,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3254.4,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016571061685681343,
|
|
"step": 2536,
|
|
"total_flos": 1378268975529984.0,
|
|
"train_loss": 0.16813087239582386,
|
|
"train_runtime": 37774.5792,
|
|
"train_samples_per_second": 2.146,
|
|
"train_steps_per_second": 0.067,
|
|
"valid_targets_mean": 4493.3,
|
|
"valid_targets_min": 905
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 2536,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 8,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1378268975529984.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|