12188 lines
339 KiB
JSON
12188 lines
339 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 5523,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0063371356147021544,
|
|
"grad_norm": 16.687779250877437,
|
|
"learning_rate": 2.8933092224231465e-07,
|
|
"loss": 0.6603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30958038568496704,
|
|
"step": 5,
|
|
"valid_targets_mean": 7389.0,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 0.012674271229404309,
|
|
"grad_norm": 16.08040156682522,
|
|
"learning_rate": 6.509945750452081e-07,
|
|
"loss": 0.6992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23575212061405182,
|
|
"step": 10,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 0.019011406844106463,
|
|
"grad_norm": 13.943626997077212,
|
|
"learning_rate": 1.0126582278481013e-06,
|
|
"loss": 0.6715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31899070739746094,
|
|
"step": 15,
|
|
"valid_targets_mean": 8539.9,
|
|
"valid_targets_min": 6761
|
|
},
|
|
{
|
|
"epoch": 0.025348542458808618,
|
|
"grad_norm": 10.444278882643168,
|
|
"learning_rate": 1.3743218806509947e-06,
|
|
"loss": 0.5937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755453288555145,
|
|
"step": 20,
|
|
"valid_targets_mean": 7579.0,
|
|
"valid_targets_min": 6174
|
|
},
|
|
{
|
|
"epoch": 0.031685678073510776,
|
|
"grad_norm": 6.971880049112938,
|
|
"learning_rate": 1.735985533453888e-06,
|
|
"loss": 0.5812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27520060539245605,
|
|
"step": 25,
|
|
"valid_targets_mean": 7328.2,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 0.03802281368821293,
|
|
"grad_norm": 5.668247985731343,
|
|
"learning_rate": 2.0976491862567814e-06,
|
|
"loss": 0.567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29798322916030884,
|
|
"step": 30,
|
|
"valid_targets_mean": 6838.2,
|
|
"valid_targets_min": 5034
|
|
},
|
|
{
|
|
"epoch": 0.044359949302915085,
|
|
"grad_norm": 4.926436594109964,
|
|
"learning_rate": 2.4593128390596747e-06,
|
|
"loss": 0.5404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653484046459198,
|
|
"step": 35,
|
|
"valid_targets_mean": 7248.2,
|
|
"valid_targets_min": 5567
|
|
},
|
|
{
|
|
"epoch": 0.050697084917617236,
|
|
"grad_norm": 4.308750459017782,
|
|
"learning_rate": 2.820976491862568e-06,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22271956503391266,
|
|
"step": 40,
|
|
"valid_targets_mean": 6234.0,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 0.057034220532319393,
|
|
"grad_norm": 3.2766970074639636,
|
|
"learning_rate": 3.1826401446654614e-06,
|
|
"loss": 0.4639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10529494285583496,
|
|
"step": 45,
|
|
"valid_targets_mean": 602.0,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 0.06337135614702155,
|
|
"grad_norm": 1.3032490380885975,
|
|
"learning_rate": 3.544303797468355e-06,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20372122526168823,
|
|
"step": 50,
|
|
"valid_targets_mean": 7299.9,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 0.0697084917617237,
|
|
"grad_norm": 0.9950198425401783,
|
|
"learning_rate": 3.905967450271248e-06,
|
|
"loss": 0.411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19291269779205322,
|
|
"step": 55,
|
|
"valid_targets_mean": 6140.0,
|
|
"valid_targets_min": 4648
|
|
},
|
|
{
|
|
"epoch": 0.07604562737642585,
|
|
"grad_norm": 0.8552394578845425,
|
|
"learning_rate": 4.267631103074141e-06,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19710980355739594,
|
|
"step": 60,
|
|
"valid_targets_mean": 6603.0,
|
|
"valid_targets_min": 4882
|
|
},
|
|
{
|
|
"epoch": 0.08238276299112801,
|
|
"grad_norm": 0.6562321811768683,
|
|
"learning_rate": 4.6292947558770344e-06,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19212953746318817,
|
|
"step": 65,
|
|
"valid_targets_mean": 8015.0,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 0.08871989860583017,
|
|
"grad_norm": 0.6811860249841902,
|
|
"learning_rate": 4.990958408679928e-06,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17592543363571167,
|
|
"step": 70,
|
|
"valid_targets_mean": 6305.2,
|
|
"valid_targets_min": 4622
|
|
},
|
|
{
|
|
"epoch": 0.09505703422053231,
|
|
"grad_norm": 0.5786171401407759,
|
|
"learning_rate": 5.352622061482822e-06,
|
|
"loss": 0.3667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1864241361618042,
|
|
"step": 75,
|
|
"valid_targets_mean": 7890.8,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 0.10139416983523447,
|
|
"grad_norm": 0.6092273547906826,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1631605625152588,
|
|
"step": 80,
|
|
"valid_targets_mean": 7016.6,
|
|
"valid_targets_min": 4960
|
|
},
|
|
{
|
|
"epoch": 0.10773130544993663,
|
|
"grad_norm": 0.5511389431539211,
|
|
"learning_rate": 6.075949367088608e-06,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630638986825943,
|
|
"step": 85,
|
|
"valid_targets_mean": 6527.1,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 0.11406844106463879,
|
|
"grad_norm": 0.619424308726675,
|
|
"learning_rate": 6.437613019891501e-06,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1726437658071518,
|
|
"step": 90,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.12040557667934093,
|
|
"grad_norm": 0.513427274619886,
|
|
"learning_rate": 6.799276672694395e-06,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17758122086524963,
|
|
"step": 95,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 0.1267427122940431,
|
|
"grad_norm": 0.4975948357932382,
|
|
"learning_rate": 7.160940325497288e-06,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17672663927078247,
|
|
"step": 100,
|
|
"valid_targets_mean": 7998.5,
|
|
"valid_targets_min": 5902
|
|
},
|
|
{
|
|
"epoch": 0.13307984790874525,
|
|
"grad_norm": 0.43201041704232973,
|
|
"learning_rate": 7.522603978300181e-06,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371014416217804,
|
|
"step": 105,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 5015
|
|
},
|
|
{
|
|
"epoch": 0.1394169835234474,
|
|
"grad_norm": 0.45715194107776114,
|
|
"learning_rate": 7.884267631103075e-06,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13644663989543915,
|
|
"step": 110,
|
|
"valid_targets_mean": 6794.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 0.14575411913814956,
|
|
"grad_norm": 0.5243729936428787,
|
|
"learning_rate": 8.245931283905967e-06,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1827109158039093,
|
|
"step": 115,
|
|
"valid_targets_mean": 6656.4,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 0.1520912547528517,
|
|
"grad_norm": 0.47137333104801876,
|
|
"learning_rate": 8.607594936708861e-06,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481418013572693,
|
|
"step": 120,
|
|
"valid_targets_mean": 7178.4,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 0.15842839036755388,
|
|
"grad_norm": 0.4989545606306773,
|
|
"learning_rate": 8.969258589511754e-06,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17527635395526886,
|
|
"step": 125,
|
|
"valid_targets_mean": 7619.8,
|
|
"valid_targets_min": 5737
|
|
},
|
|
{
|
|
"epoch": 0.16476552598225602,
|
|
"grad_norm": 0.4422069030331628,
|
|
"learning_rate": 9.330922242314648e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13178931176662445,
|
|
"step": 130,
|
|
"valid_targets_mean": 8754.4,
|
|
"valid_targets_min": 5752
|
|
},
|
|
{
|
|
"epoch": 0.17110266159695817,
|
|
"grad_norm": 0.5400895472352757,
|
|
"learning_rate": 9.69258589511754e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1462893933057785,
|
|
"step": 135,
|
|
"valid_targets_mean": 6341.0,
|
|
"valid_targets_min": 4543
|
|
},
|
|
{
|
|
"epoch": 0.17743979721166034,
|
|
"grad_norm": 0.48662890438692497,
|
|
"learning_rate": 1.0054249547920433e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14835844933986664,
|
|
"step": 140,
|
|
"valid_targets_mean": 7181.9,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 0.18377693282636248,
|
|
"grad_norm": 1.0225487686400567,
|
|
"learning_rate": 1.0415913200723329e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11732099950313568,
|
|
"step": 145,
|
|
"valid_targets_mean": 1104.4,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 0.19011406844106463,
|
|
"grad_norm": 0.4490404889711451,
|
|
"learning_rate": 1.0777576853526221e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1315346360206604,
|
|
"step": 150,
|
|
"valid_targets_mean": 7018.2,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 0.1964512040557668,
|
|
"grad_norm": 0.46505911804592187,
|
|
"learning_rate": 1.1139240506329114e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15561750531196594,
|
|
"step": 155,
|
|
"valid_targets_mean": 7738.4,
|
|
"valid_targets_min": 5329
|
|
},
|
|
{
|
|
"epoch": 0.20278833967046894,
|
|
"grad_norm": 0.486012878863244,
|
|
"learning_rate": 1.150090415913201e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15027301013469696,
|
|
"step": 160,
|
|
"valid_targets_mean": 6426.6,
|
|
"valid_targets_min": 5560
|
|
},
|
|
{
|
|
"epoch": 0.20912547528517111,
|
|
"grad_norm": 0.5075687496876032,
|
|
"learning_rate": 1.1862567811934902e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15287069976329803,
|
|
"step": 165,
|
|
"valid_targets_mean": 6169.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 0.21546261089987326,
|
|
"grad_norm": 0.4561677268078216,
|
|
"learning_rate": 1.2224231464737795e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477714329957962,
|
|
"step": 170,
|
|
"valid_targets_mean": 7204.6,
|
|
"valid_targets_min": 5381
|
|
},
|
|
{
|
|
"epoch": 0.2217997465145754,
|
|
"grad_norm": 0.44672336473490154,
|
|
"learning_rate": 1.2585895117540687e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14919322729110718,
|
|
"step": 175,
|
|
"valid_targets_mean": 7600.4,
|
|
"valid_targets_min": 5241
|
|
},
|
|
{
|
|
"epoch": 0.22813688212927757,
|
|
"grad_norm": 0.5045152753505999,
|
|
"learning_rate": 1.2947558770343582e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1546531617641449,
|
|
"step": 180,
|
|
"valid_targets_mean": 7098.5,
|
|
"valid_targets_min": 5607
|
|
},
|
|
{
|
|
"epoch": 0.23447401774397972,
|
|
"grad_norm": 0.5210585689385613,
|
|
"learning_rate": 1.3309222423146476e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1501704454421997,
|
|
"step": 185,
|
|
"valid_targets_mean": 6955.1,
|
|
"valid_targets_min": 5530
|
|
},
|
|
{
|
|
"epoch": 0.24081115335868186,
|
|
"grad_norm": 0.49490644500617753,
|
|
"learning_rate": 1.3670886075949368e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14456501603126526,
|
|
"step": 190,
|
|
"valid_targets_mean": 6748.1,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 0.24714828897338403,
|
|
"grad_norm": 0.527660534437476,
|
|
"learning_rate": 1.403254972875226e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13962307572364807,
|
|
"step": 195,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 0.2534854245880862,
|
|
"grad_norm": 0.46228329415923425,
|
|
"learning_rate": 1.4394213381555155e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392287313938141,
|
|
"step": 200,
|
|
"valid_targets_mean": 6419.5,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 0.2598225602027883,
|
|
"grad_norm": 0.47785018673575774,
|
|
"learning_rate": 1.4755877034358048e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13847406208515167,
|
|
"step": 205,
|
|
"valid_targets_mean": 6483.4,
|
|
"valid_targets_min": 4309
|
|
},
|
|
{
|
|
"epoch": 0.2661596958174905,
|
|
"grad_norm": 1.2760138683458349,
|
|
"learning_rate": 1.5117540687160942e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15761423110961914,
|
|
"step": 210,
|
|
"valid_targets_mean": 6825.0,
|
|
"valid_targets_min": 5205
|
|
},
|
|
{
|
|
"epoch": 0.27249683143219267,
|
|
"grad_norm": 0.5050832529336614,
|
|
"learning_rate": 1.5479204339963836e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12686027586460114,
|
|
"step": 215,
|
|
"valid_targets_mean": 6729.4,
|
|
"valid_targets_min": 5200
|
|
},
|
|
{
|
|
"epoch": 0.2788339670468948,
|
|
"grad_norm": 0.5043700191158442,
|
|
"learning_rate": 1.584086799276673e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13390758633613586,
|
|
"step": 220,
|
|
"valid_targets_mean": 6094.0,
|
|
"valid_targets_min": 5302
|
|
},
|
|
{
|
|
"epoch": 0.28517110266159695,
|
|
"grad_norm": 0.520358035102514,
|
|
"learning_rate": 1.620253164556962e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14343518018722534,
|
|
"step": 225,
|
|
"valid_targets_mean": 7140.0,
|
|
"valid_targets_min": 5270
|
|
},
|
|
{
|
|
"epoch": 0.2915082382762991,
|
|
"grad_norm": 0.4429566926478989,
|
|
"learning_rate": 1.6564195298372515e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271364688873291,
|
|
"step": 230,
|
|
"valid_targets_mean": 7425.0,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 0.29784537389100124,
|
|
"grad_norm": 0.505101220338255,
|
|
"learning_rate": 1.692585895117541e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1665816605091095,
|
|
"step": 235,
|
|
"valid_targets_mean": 7063.4,
|
|
"valid_targets_min": 5386
|
|
},
|
|
{
|
|
"epoch": 0.3041825095057034,
|
|
"grad_norm": 0.49028711034349926,
|
|
"learning_rate": 1.72875226039783e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14317858219146729,
|
|
"step": 240,
|
|
"valid_targets_mean": 6827.0,
|
|
"valid_targets_min": 4547
|
|
},
|
|
{
|
|
"epoch": 0.3105196451204056,
|
|
"grad_norm": 0.8617191733636155,
|
|
"learning_rate": 1.7649186256781194e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08684775978326797,
|
|
"step": 245,
|
|
"valid_targets_mean": 2047.0,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 0.31685678073510776,
|
|
"grad_norm": 0.4600625120519638,
|
|
"learning_rate": 1.801084990958409e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13209614157676697,
|
|
"step": 250,
|
|
"valid_targets_mean": 6754.1,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 0.3231939163498099,
|
|
"grad_norm": 0.5329593528531548,
|
|
"learning_rate": 1.8372513562386983e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329403817653656,
|
|
"step": 255,
|
|
"valid_targets_mean": 5985.6,
|
|
"valid_targets_min": 5028
|
|
},
|
|
{
|
|
"epoch": 0.32953105196451205,
|
|
"grad_norm": 0.5536812928321088,
|
|
"learning_rate": 1.8734177215189874e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135657668113708,
|
|
"step": 260,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 5237
|
|
},
|
|
{
|
|
"epoch": 0.3358681875792142,
|
|
"grad_norm": 0.9728766912933849,
|
|
"learning_rate": 1.9095840867992768e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371271312236786,
|
|
"step": 265,
|
|
"valid_targets_mean": 6319.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 0.34220532319391633,
|
|
"grad_norm": 0.46061806461836113,
|
|
"learning_rate": 1.9457504520795662e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672719776630402,
|
|
"step": 270,
|
|
"valid_targets_mean": 7125.8,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 0.3485424588086185,
|
|
"grad_norm": 0.4970372445457504,
|
|
"learning_rate": 1.9819168173598556e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304190456867218,
|
|
"step": 275,
|
|
"valid_targets_mean": 6123.5,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 0.3548795944233207,
|
|
"grad_norm": 1.4908668824306905,
|
|
"learning_rate": 2.0180831826401447e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11352495104074478,
|
|
"step": 280,
|
|
"valid_targets_mean": 2616.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.3612167300380228,
|
|
"grad_norm": 1.0121403390533505,
|
|
"learning_rate": 2.054249547920434e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10224316269159317,
|
|
"step": 285,
|
|
"valid_targets_mean": 1793.2,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.36755386565272496,
|
|
"grad_norm": 0.6773924502980209,
|
|
"learning_rate": 2.0904159132007232e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09892472624778748,
|
|
"step": 290,
|
|
"valid_targets_mean": 2705.1,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 0.37389100126742714,
|
|
"grad_norm": 0.668056158892766,
|
|
"learning_rate": 2.1265822784810126e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10009296238422394,
|
|
"step": 295,
|
|
"valid_targets_mean": 3501.9,
|
|
"valid_targets_min": 2479
|
|
},
|
|
{
|
|
"epoch": 0.38022813688212925,
|
|
"grad_norm": 0.5529030242099016,
|
|
"learning_rate": 2.1627486437613024e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062157098203897476,
|
|
"step": 300,
|
|
"valid_targets_mean": 3338.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 0.3865652724968314,
|
|
"grad_norm": 0.592728401112065,
|
|
"learning_rate": 2.1989150090415915e-05,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08736437559127808,
|
|
"step": 305,
|
|
"valid_targets_mean": 2920.2,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.3929024081115336,
|
|
"grad_norm": 1.1251653347337707,
|
|
"learning_rate": 2.235081374321881e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15301838517189026,
|
|
"step": 310,
|
|
"valid_targets_mean": 1602.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.39923954372623577,
|
|
"grad_norm": 0.5618919980069048,
|
|
"learning_rate": 2.2712477396021703e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08041463792324066,
|
|
"step": 315,
|
|
"valid_targets_mean": 3130.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.4055766793409379,
|
|
"grad_norm": 0.6209500543695404,
|
|
"learning_rate": 2.3074141048824594e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08987985551357269,
|
|
"step": 320,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 0.41191381495564006,
|
|
"grad_norm": 0.7790453861468684,
|
|
"learning_rate": 2.3435804701627488e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271807849407196,
|
|
"step": 325,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 0.41825095057034223,
|
|
"grad_norm": 0.47622112724649873,
|
|
"learning_rate": 2.379746835443038e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0746094286441803,
|
|
"step": 330,
|
|
"valid_targets_mean": 2932.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.42458808618504434,
|
|
"grad_norm": 0.7422287277154004,
|
|
"learning_rate": 2.4159132007233276e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09356822818517685,
|
|
"step": 335,
|
|
"valid_targets_mean": 2507.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.4309252217997465,
|
|
"grad_norm": 0.5535498682394768,
|
|
"learning_rate": 2.452079566003617e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08713088184595108,
|
|
"step": 340,
|
|
"valid_targets_mean": 3089.9,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.4372623574144487,
|
|
"grad_norm": 0.8405405848687924,
|
|
"learning_rate": 2.488245931283906e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2348252832889557,
|
|
"step": 345,
|
|
"valid_targets_mean": 2035.1,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.4435994930291508,
|
|
"grad_norm": 0.5059511106793528,
|
|
"learning_rate": 2.5244122965641956e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829329371452332,
|
|
"step": 350,
|
|
"valid_targets_mean": 3800.2,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 0.449936628643853,
|
|
"grad_norm": 0.5695026440878165,
|
|
"learning_rate": 2.5605786618444847e-05,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10142242163419724,
|
|
"step": 355,
|
|
"valid_targets_mean": 3504.6,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 0.45627376425855515,
|
|
"grad_norm": 0.5616316715270487,
|
|
"learning_rate": 2.596745027124774e-05,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06939315795898438,
|
|
"step": 360,
|
|
"valid_targets_mean": 3088.0,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.46261089987325726,
|
|
"grad_norm": 0.5886877508128575,
|
|
"learning_rate": 2.6329113924050635e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08969832956790924,
|
|
"step": 365,
|
|
"valid_targets_mean": 3463.5,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 0.46894803548795944,
|
|
"grad_norm": 0.4840700587450476,
|
|
"learning_rate": 2.669077757685353e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05652599781751633,
|
|
"step": 370,
|
|
"valid_targets_mean": 3875.4,
|
|
"valid_targets_min": 3245
|
|
},
|
|
{
|
|
"epoch": 0.4752851711026616,
|
|
"grad_norm": 0.7631872217261343,
|
|
"learning_rate": 2.7052441229656423e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08616983145475388,
|
|
"step": 375,
|
|
"valid_targets_mean": 2261.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.4816223067173637,
|
|
"grad_norm": 0.6615166299776591,
|
|
"learning_rate": 2.7414104882459318e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06612023711204529,
|
|
"step": 380,
|
|
"valid_targets_mean": 1559.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.4879594423320659,
|
|
"grad_norm": 0.7477698556756042,
|
|
"learning_rate": 2.777576853526221e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275586187839508,
|
|
"step": 385,
|
|
"valid_targets_mean": 3198.4,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 0.49429657794676807,
|
|
"grad_norm": 0.5010837404032417,
|
|
"learning_rate": 2.8137432188065103e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07833996415138245,
|
|
"step": 390,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 0.5006337135614702,
|
|
"grad_norm": 0.6892982836194295,
|
|
"learning_rate": 2.8499095840867993e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07762724161148071,
|
|
"step": 395,
|
|
"valid_targets_mean": 3100.1,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 0.5069708491761724,
|
|
"grad_norm": 0.5348790602545861,
|
|
"learning_rate": 2.8860759493670888e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07110585272312164,
|
|
"step": 400,
|
|
"valid_targets_mean": 3255.4,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 0.5133079847908745,
|
|
"grad_norm": 0.5501131684452417,
|
|
"learning_rate": 2.9222423146473785e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05221695452928543,
|
|
"step": 405,
|
|
"valid_targets_mean": 2862.5,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 0.5196451204055766,
|
|
"grad_norm": 0.781823804149313,
|
|
"learning_rate": 2.9584086799276676e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06272034347057343,
|
|
"step": 410,
|
|
"valid_targets_mean": 1282.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.5259822560202788,
|
|
"grad_norm": 0.6672197781445346,
|
|
"learning_rate": 2.994575045207957e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17038753628730774,
|
|
"step": 415,
|
|
"valid_targets_mean": 2626.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.532319391634981,
|
|
"grad_norm": 0.7718008136407154,
|
|
"learning_rate": 3.030741410488246e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060038816183805466,
|
|
"step": 420,
|
|
"valid_targets_mean": 1323.9,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 0.5386565272496832,
|
|
"grad_norm": 0.8466539099984428,
|
|
"learning_rate": 3.0669077757685355e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09081956744194031,
|
|
"step": 425,
|
|
"valid_targets_mean": 1575.5,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 0.5449936628643853,
|
|
"grad_norm": 0.5919532273711182,
|
|
"learning_rate": 3.1030741410488246e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11148825287818909,
|
|
"step": 430,
|
|
"valid_targets_mean": 4513.2,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 0.5513307984790875,
|
|
"grad_norm": 0.38906564216632955,
|
|
"learning_rate": 3.1392405063291144e-05,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035884298384189606,
|
|
"step": 435,
|
|
"valid_targets_mean": 4744.9,
|
|
"valid_targets_min": 3529
|
|
},
|
|
{
|
|
"epoch": 0.5576679340937896,
|
|
"grad_norm": 0.494682790574575,
|
|
"learning_rate": 3.1754068716094034e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07888288050889969,
|
|
"step": 440,
|
|
"valid_targets_mean": 4400.1,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 0.5640050697084917,
|
|
"grad_norm": 0.4723048903896541,
|
|
"learning_rate": 3.211573236889693e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0863257348537445,
|
|
"step": 445,
|
|
"valid_targets_mean": 3865.4,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 0.5703422053231939,
|
|
"grad_norm": 0.4810518983791002,
|
|
"learning_rate": 3.247739602169982e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07077671587467194,
|
|
"step": 450,
|
|
"valid_targets_mean": 3094.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.5766793409378961,
|
|
"grad_norm": 0.5462029368038006,
|
|
"learning_rate": 3.2839059674502714e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06326122581958771,
|
|
"step": 455,
|
|
"valid_targets_mean": 2178.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.5830164765525983,
|
|
"grad_norm": 0.5434408078744161,
|
|
"learning_rate": 3.320072332730561e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06453520804643631,
|
|
"step": 460,
|
|
"valid_targets_mean": 2657.1,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.5893536121673004,
|
|
"grad_norm": 0.40451144860252547,
|
|
"learning_rate": 3.35623869801085e-05,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06007637083530426,
|
|
"step": 465,
|
|
"valid_targets_mean": 3808.5,
|
|
"valid_targets_min": 3278
|
|
},
|
|
{
|
|
"epoch": 0.5956907477820025,
|
|
"grad_norm": 0.6465800124046714,
|
|
"learning_rate": 3.392405063291139e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07329286634922028,
|
|
"step": 470,
|
|
"valid_targets_mean": 1776.9,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.6020278833967047,
|
|
"grad_norm": 0.7417444339038128,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08900757879018784,
|
|
"step": 475,
|
|
"valid_targets_mean": 1609.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.6083650190114068,
|
|
"grad_norm": 0.7072872051331359,
|
|
"learning_rate": 3.464737793851718e-05,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058485619723796844,
|
|
"step": 480,
|
|
"valid_targets_mean": 1540.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.614702154626109,
|
|
"grad_norm": 0.6108021781787403,
|
|
"learning_rate": 3.500904159132008e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07599474489688873,
|
|
"step": 485,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 0.6210392902408112,
|
|
"grad_norm": 0.5463942211108099,
|
|
"learning_rate": 3.537070524412297e-05,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06756268441677094,
|
|
"step": 490,
|
|
"valid_targets_mean": 2824.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.6273764258555133,
|
|
"grad_norm": 0.5017225479947749,
|
|
"learning_rate": 3.573236889692586e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0748659074306488,
|
|
"step": 495,
|
|
"valid_targets_mean": 3541.8,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 0.6337135614702155,
|
|
"grad_norm": 0.560680260783109,
|
|
"learning_rate": 3.609403254972876e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08683539181947708,
|
|
"step": 500,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.6400506970849176,
|
|
"grad_norm": 0.5011783296667225,
|
|
"learning_rate": 3.645569620253165e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06616389751434326,
|
|
"step": 505,
|
|
"valid_targets_mean": 3878.9,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 0.6463878326996197,
|
|
"grad_norm": 0.6255229221527177,
|
|
"learning_rate": 3.681735985533454e-05,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06197888031601906,
|
|
"step": 510,
|
|
"valid_targets_mean": 1184.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 0.6527249683143219,
|
|
"grad_norm": 0.6037293923013698,
|
|
"learning_rate": 3.717902350813744e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13245804607868195,
|
|
"step": 515,
|
|
"valid_targets_mean": 3220.4,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 0.6590621039290241,
|
|
"grad_norm": 0.6026134094069487,
|
|
"learning_rate": 3.754068716094033e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06798072159290314,
|
|
"step": 520,
|
|
"valid_targets_mean": 3219.5,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 0.6653992395437263,
|
|
"grad_norm": 0.8240289487549451,
|
|
"learning_rate": 3.7902350813743226e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0881635844707489,
|
|
"step": 525,
|
|
"valid_targets_mean": 1672.2,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.6717363751584284,
|
|
"grad_norm": 0.7162977860365454,
|
|
"learning_rate": 3.8264014466546117e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05302521213889122,
|
|
"step": 530,
|
|
"valid_targets_mean": 1181.4,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 0.6780735107731305,
|
|
"grad_norm": 0.42372922690771314,
|
|
"learning_rate": 3.862567811934901e-05,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07375142723321915,
|
|
"step": 535,
|
|
"valid_targets_mean": 3762.2,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 0.6844106463878327,
|
|
"grad_norm": 0.7298057530062158,
|
|
"learning_rate": 3.89873417721519e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11180619150400162,
|
|
"step": 540,
|
|
"valid_targets_mean": 2832.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.6907477820025348,
|
|
"grad_norm": 0.4776410882517414,
|
|
"learning_rate": 3.9349005424954796e-05,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051171526312828064,
|
|
"step": 545,
|
|
"valid_targets_mean": 3865.2,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.697084917617237,
|
|
"grad_norm": 0.4909439756472234,
|
|
"learning_rate": 3.9710669077757687e-05,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07672526687383652,
|
|
"step": 550,
|
|
"valid_targets_mean": 3961.6,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 0.7034220532319392,
|
|
"grad_norm": 0.5832708994667134,
|
|
"learning_rate": 3.999999600435447e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07603627443313599,
|
|
"step": 555,
|
|
"valid_targets_mean": 2066.0,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.7097591888466414,
|
|
"grad_norm": 0.5186153063777634,
|
|
"learning_rate": 3.9999856156928475e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0634511411190033,
|
|
"step": 560,
|
|
"valid_targets_mean": 1982.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.7160963244613435,
|
|
"grad_norm": 0.5024794569197574,
|
|
"learning_rate": 3.99995165288224e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0765785276889801,
|
|
"step": 565,
|
|
"valid_targets_mean": 3687.4,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 0.7224334600760456,
|
|
"grad_norm": 0.4306736847398378,
|
|
"learning_rate": 3.9998977123428806e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07399982213973999,
|
|
"step": 570,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 0.7287705956907478,
|
|
"grad_norm": 0.49071712801296014,
|
|
"learning_rate": 3.9998237946135884e-05,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0577738918364048,
|
|
"step": 575,
|
|
"valid_targets_mean": 1100.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.7351077313054499,
|
|
"grad_norm": 0.5309836462521161,
|
|
"learning_rate": 3.999729900432735e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06829934567213058,
|
|
"step": 580,
|
|
"valid_targets_mean": 3067.5,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7414448669201521,
|
|
"grad_norm": 0.4964511014734685,
|
|
"learning_rate": 3.999616030738239e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06064262613654137,
|
|
"step": 585,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 3301
|
|
},
|
|
{
|
|
"epoch": 0.7477820025348543,
|
|
"grad_norm": 0.48252303553054554,
|
|
"learning_rate": 3.999482186667557e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058770228177309036,
|
|
"step": 590,
|
|
"valid_targets_mean": 2841.2,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.7541191381495564,
|
|
"grad_norm": 0.4687678051113767,
|
|
"learning_rate": 3.999328369557672e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05852432921528816,
|
|
"step": 595,
|
|
"valid_targets_mean": 2733.4,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.7604562737642585,
|
|
"grad_norm": 0.784698819730372,
|
|
"learning_rate": 3.999154580945079e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09218577295541763,
|
|
"step": 600,
|
|
"valid_targets_mean": 1322.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.7667934093789607,
|
|
"grad_norm": 0.6429286687902588,
|
|
"learning_rate": 3.998960822565771e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05806161090731621,
|
|
"step": 605,
|
|
"valid_targets_mean": 3263.9,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 0.7731305449936628,
|
|
"grad_norm": 0.8063877471689734,
|
|
"learning_rate": 3.998747096355221e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09889532625675201,
|
|
"step": 610,
|
|
"valid_targets_mean": 1634.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.779467680608365,
|
|
"grad_norm": 0.5216638283015348,
|
|
"learning_rate": 3.998513404448363e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07127735018730164,
|
|
"step": 615,
|
|
"valid_targets_mean": 2644.0,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 0.7858048162230672,
|
|
"grad_norm": 0.45791373692829307,
|
|
"learning_rate": 3.9982597491795695e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05955201014876366,
|
|
"step": 620,
|
|
"valid_targets_mean": 3539.0,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 0.7921419518377694,
|
|
"grad_norm": 0.43127088760547827,
|
|
"learning_rate": 3.9979861330826295e-05,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05608126521110535,
|
|
"step": 625,
|
|
"valid_targets_mean": 2930.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.7984790874524715,
|
|
"grad_norm": 0.5160986120903414,
|
|
"learning_rate": 3.997692558890725e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05514921993017197,
|
|
"step": 630,
|
|
"valid_targets_mean": 1992.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.8048162230671736,
|
|
"grad_norm": 0.544850828814503,
|
|
"learning_rate": 3.997379029536397e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07077699154615402,
|
|
"step": 635,
|
|
"valid_targets_mean": 2636.1,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.8111533586818758,
|
|
"grad_norm": 0.6164217488331163,
|
|
"learning_rate": 3.997045548151526e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05384323000907898,
|
|
"step": 640,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 0.8174904942965779,
|
|
"grad_norm": 0.6484979538744611,
|
|
"learning_rate": 3.9966921180672916e-05,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07219064235687256,
|
|
"step": 645,
|
|
"valid_targets_mean": 2723.0,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 0.8238276299112801,
|
|
"grad_norm": 0.3759069079026419,
|
|
"learning_rate": 3.9963187428141445e-05,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06962897628545761,
|
|
"step": 650,
|
|
"valid_targets_mean": 4528.9,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 0.8301647655259823,
|
|
"grad_norm": 0.3621918048532189,
|
|
"learning_rate": 3.99592542612177e-05,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04644259065389633,
|
|
"step": 655,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 0.8365019011406845,
|
|
"grad_norm": 0.5013406377730187,
|
|
"learning_rate": 3.9955121719190485e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05708400905132294,
|
|
"step": 660,
|
|
"valid_targets_mean": 2657.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.8428390367553865,
|
|
"grad_norm": 0.4309134741244911,
|
|
"learning_rate": 3.995078984334023e-05,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062330760061740875,
|
|
"step": 665,
|
|
"valid_targets_mean": 4010.0,
|
|
"valid_targets_min": 3213
|
|
},
|
|
{
|
|
"epoch": 0.8491761723700887,
|
|
"grad_norm": 0.49692618791012066,
|
|
"learning_rate": 3.994625867693847e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15090230107307434,
|
|
"step": 670,
|
|
"valid_targets_mean": 2696.0,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 0.8555133079847909,
|
|
"grad_norm": 0.47206903676106016,
|
|
"learning_rate": 3.994152826524753e-05,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05803271383047104,
|
|
"step": 675,
|
|
"valid_targets_mean": 2255.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.861850443599493,
|
|
"grad_norm": 0.39080581265927034,
|
|
"learning_rate": 3.993659865551998e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041734904050827026,
|
|
"step": 680,
|
|
"valid_targets_mean": 3199.2,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 0.8681875792141952,
|
|
"grad_norm": 0.4430711593569562,
|
|
"learning_rate": 3.9931469896998205e-05,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05955768749117851,
|
|
"step": 685,
|
|
"valid_targets_mean": 3062.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 0.8745247148288974,
|
|
"grad_norm": 0.4299466190345843,
|
|
"learning_rate": 3.992614204091393e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06108292192220688,
|
|
"step": 690,
|
|
"valid_targets_mean": 2574.4,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.8808618504435995,
|
|
"grad_norm": 0.7379925251854682,
|
|
"learning_rate": 3.992061514048769e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08898892253637314,
|
|
"step": 695,
|
|
"valid_targets_mean": 2292.9,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 0.8871989860583016,
|
|
"grad_norm": 0.3916163928541908,
|
|
"learning_rate": 3.991488925092824e-05,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04258294031023979,
|
|
"step": 700,
|
|
"valid_targets_mean": 3488.0,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 0.8935361216730038,
|
|
"grad_norm": 0.6380340455635237,
|
|
"learning_rate": 3.9908964429432126e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08150316774845123,
|
|
"step": 705,
|
|
"valid_targets_mean": 2133.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.899873257287706,
|
|
"grad_norm": 0.6404228737164719,
|
|
"learning_rate": 3.990284073518301e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10210031270980835,
|
|
"step": 710,
|
|
"valid_targets_mean": 1668.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.9062103929024081,
|
|
"grad_norm": 0.4748467931076169,
|
|
"learning_rate": 3.989651822935111e-05,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06763602793216705,
|
|
"step": 715,
|
|
"valid_targets_mean": 3523.0,
|
|
"valid_targets_min": 2784
|
|
},
|
|
{
|
|
"epoch": 0.9125475285171103,
|
|
"grad_norm": 0.4321203224303213,
|
|
"learning_rate": 3.988999697509262e-05,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05824748054146767,
|
|
"step": 720,
|
|
"valid_targets_mean": 3526.4,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 0.9188846641318125,
|
|
"grad_norm": 0.4401072333806534,
|
|
"learning_rate": 3.9883277037549033e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05185583978891373,
|
|
"step": 725,
|
|
"valid_targets_mean": 2854.1,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 0.9252217997465145,
|
|
"grad_norm": 0.626061010776256,
|
|
"learning_rate": 3.9876358483846526e-05,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07002581655979156,
|
|
"step": 730,
|
|
"valid_targets_mean": 1380.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 0.9315589353612167,
|
|
"grad_norm": 0.8461403206055458,
|
|
"learning_rate": 3.9869241383095255e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11466317623853683,
|
|
"step": 735,
|
|
"valid_targets_mean": 1306.1,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.9378960709759189,
|
|
"grad_norm": 0.4509291663658056,
|
|
"learning_rate": 3.986192580638868e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0591486394405365,
|
|
"step": 740,
|
|
"valid_targets_mean": 2619.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.944233206590621,
|
|
"grad_norm": 0.37662760290509084,
|
|
"learning_rate": 3.98544118268029e-05,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060177143663167953,
|
|
"step": 745,
|
|
"valid_targets_mean": 3353.8,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 0.9505703422053232,
|
|
"grad_norm": 0.43267420051400346,
|
|
"learning_rate": 3.984669951939583e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05944909527897835,
|
|
"step": 750,
|
|
"valid_targets_mean": 3196.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.9569074778200254,
|
|
"grad_norm": 0.3624868433486839,
|
|
"learning_rate": 3.9838788961206533e-05,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06078897789120674,
|
|
"step": 755,
|
|
"valid_targets_mean": 5146.8,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 0.9632446134347274,
|
|
"grad_norm": 0.3320400948174765,
|
|
"learning_rate": 3.983068023125442e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06072650104761124,
|
|
"step": 760,
|
|
"valid_targets_mean": 3559.5,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.9695817490494296,
|
|
"grad_norm": 0.6460539010804871,
|
|
"learning_rate": 3.982237341053843e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06736043840646744,
|
|
"step": 765,
|
|
"valid_targets_mean": 1290.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.9759188846641318,
|
|
"grad_norm": 0.6567889757310337,
|
|
"learning_rate": 3.98138685820363e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269399523735046,
|
|
"step": 770,
|
|
"valid_targets_mean": 2540.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.982256020278834,
|
|
"grad_norm": 0.4247858344801813,
|
|
"learning_rate": 3.9805165830703643e-05,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058636635541915894,
|
|
"step": 775,
|
|
"valid_targets_mean": 3372.6,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 0.9885931558935361,
|
|
"grad_norm": 0.4076197265353508,
|
|
"learning_rate": 3.979626524347318e-05,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07509380578994751,
|
|
"step": 780,
|
|
"valid_targets_mean": 4056.1,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 0.9949302915082383,
|
|
"grad_norm": 0.413580116658778,
|
|
"learning_rate": 3.978716690925381e-05,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0685562714934349,
|
|
"step": 785,
|
|
"valid_targets_mean": 3928.2,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 1.0012674271229405,
|
|
"grad_norm": 0.5489296946509659,
|
|
"learning_rate": 3.977787091892975e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382235437631607,
|
|
"step": 790,
|
|
"valid_targets_mean": 8215.2,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 1.0076045627376427,
|
|
"grad_norm": 0.45850373661386445,
|
|
"learning_rate": 3.976837736535964e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14155279099941254,
|
|
"step": 795,
|
|
"valid_targets_mean": 8594.4,
|
|
"valid_targets_min": 6006
|
|
},
|
|
{
|
|
"epoch": 1.0139416983523448,
|
|
"grad_norm": 0.562880350392844,
|
|
"learning_rate": 3.975868634337559e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12423060089349747,
|
|
"step": 800,
|
|
"valid_targets_mean": 3578.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.020278833967047,
|
|
"grad_norm": 0.4432197016416057,
|
|
"learning_rate": 3.974879794978224e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308540254831314,
|
|
"step": 805,
|
|
"valid_targets_mean": 7019.9,
|
|
"valid_targets_min": 4985
|
|
},
|
|
{
|
|
"epoch": 1.026615969581749,
|
|
"grad_norm": 0.42978151981584634,
|
|
"learning_rate": 3.97387122833558e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12700623273849487,
|
|
"step": 810,
|
|
"valid_targets_mean": 6956.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 1.0329531051964511,
|
|
"grad_norm": 0.4408046824660545,
|
|
"learning_rate": 3.972842944484307e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11905914545059204,
|
|
"step": 815,
|
|
"valid_targets_mean": 7361.6,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 1.0392902408111533,
|
|
"grad_norm": 0.46864760063436145,
|
|
"learning_rate": 3.971794953696041e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15778818726539612,
|
|
"step": 820,
|
|
"valid_targets_mean": 7058.5,
|
|
"valid_targets_min": 5985
|
|
},
|
|
{
|
|
"epoch": 1.0456273764258555,
|
|
"grad_norm": 0.4277688685933005,
|
|
"learning_rate": 3.9707272664392724e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1414439082145691,
|
|
"step": 825,
|
|
"valid_targets_mean": 7664.2,
|
|
"valid_targets_min": 5169
|
|
},
|
|
{
|
|
"epoch": 1.0519645120405576,
|
|
"grad_norm": 0.4125904220475861,
|
|
"learning_rate": 3.969639893379242e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12562593817710876,
|
|
"step": 830,
|
|
"valid_targets_mean": 7111.8,
|
|
"valid_targets_min": 5348
|
|
},
|
|
{
|
|
"epoch": 1.0583016476552598,
|
|
"grad_norm": 0.48879024947329763,
|
|
"learning_rate": 3.9685328453778346e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11587627232074738,
|
|
"step": 835,
|
|
"valid_targets_mean": 4238.9,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 1.064638783269962,
|
|
"grad_norm": 0.43096035022585283,
|
|
"learning_rate": 3.96740613349347e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272483617067337,
|
|
"step": 840,
|
|
"valid_targets_mean": 7199.4,
|
|
"valid_targets_min": 4258
|
|
},
|
|
{
|
|
"epoch": 1.0709759188846641,
|
|
"grad_norm": 0.40792334267117636,
|
|
"learning_rate": 3.966259768980993e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11864976584911346,
|
|
"step": 845,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 5831
|
|
},
|
|
{
|
|
"epoch": 1.0773130544993663,
|
|
"grad_norm": 0.5318594155805494,
|
|
"learning_rate": 3.965093763291559e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1191026121377945,
|
|
"step": 850,
|
|
"valid_targets_mean": 6950.6,
|
|
"valid_targets_min": 3862
|
|
},
|
|
{
|
|
"epoch": 1.0836501901140685,
|
|
"grad_norm": 0.437082809765885,
|
|
"learning_rate": 3.9639081280725224e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12826035916805267,
|
|
"step": 855,
|
|
"valid_targets_mean": 6991.9,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 1.0899873257287707,
|
|
"grad_norm": 0.4077976853387214,
|
|
"learning_rate": 3.962702875167321e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282118260860443,
|
|
"step": 860,
|
|
"valid_targets_mean": 6963.5,
|
|
"valid_targets_min": 4947
|
|
},
|
|
{
|
|
"epoch": 1.0963244613434728,
|
|
"grad_norm": 0.41203525406959246,
|
|
"learning_rate": 3.96147801661535e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11119857430458069,
|
|
"step": 865,
|
|
"valid_targets_mean": 6436.9,
|
|
"valid_targets_min": 4988
|
|
},
|
|
{
|
|
"epoch": 1.102661596958175,
|
|
"grad_norm": 0.4382183183159476,
|
|
"learning_rate": 3.960233564651853e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11853908002376556,
|
|
"step": 870,
|
|
"valid_targets_mean": 7518.0,
|
|
"valid_targets_min": 5476
|
|
},
|
|
{
|
|
"epoch": 1.1089987325728772,
|
|
"grad_norm": 0.38316185027397626,
|
|
"learning_rate": 3.958969531707793e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10954572260379791,
|
|
"step": 875,
|
|
"valid_targets_mean": 7422.6,
|
|
"valid_targets_min": 5226
|
|
},
|
|
{
|
|
"epoch": 1.1153358681875791,
|
|
"grad_norm": 0.44528703515303325,
|
|
"learning_rate": 3.957685930409729e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1536170244216919,
|
|
"step": 880,
|
|
"valid_targets_mean": 7080.0,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 1.1216730038022813,
|
|
"grad_norm": 0.5041613753372751,
|
|
"learning_rate": 3.9563827735796884e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13451546430587769,
|
|
"step": 885,
|
|
"valid_targets_mean": 6064.6,
|
|
"valid_targets_min": 5161
|
|
},
|
|
{
|
|
"epoch": 1.1280101394169835,
|
|
"grad_norm": 0.42210178249629177,
|
|
"learning_rate": 3.955060074235045e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11367177963256836,
|
|
"step": 890,
|
|
"valid_targets_mean": 7388.1,
|
|
"valid_targets_min": 5795
|
|
},
|
|
{
|
|
"epoch": 1.1343472750316856,
|
|
"grad_norm": 0.3783578110084366,
|
|
"learning_rate": 3.95371784558838e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10694952309131622,
|
|
"step": 895,
|
|
"valid_targets_mean": 7696.0,
|
|
"valid_targets_min": 4658
|
|
},
|
|
{
|
|
"epoch": 1.1406844106463878,
|
|
"grad_norm": 0.45252779954172345,
|
|
"learning_rate": 3.952356101047359e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260455846786499,
|
|
"step": 900,
|
|
"valid_targets_mean": 7799.1,
|
|
"valid_targets_min": 5508
|
|
},
|
|
{
|
|
"epoch": 1.14702154626109,
|
|
"grad_norm": 0.43341813143612007,
|
|
"learning_rate": 3.950974854214593e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11695165932178497,
|
|
"step": 905,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 6454
|
|
},
|
|
{
|
|
"epoch": 1.1533586818757922,
|
|
"grad_norm": 0.40614351064917253,
|
|
"learning_rate": 3.949574118887502e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10340000689029694,
|
|
"step": 910,
|
|
"valid_targets_mean": 6818.0,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 1.1596958174904943,
|
|
"grad_norm": 0.39467560967034576,
|
|
"learning_rate": 3.948153909058179e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10298674553632736,
|
|
"step": 915,
|
|
"valid_targets_mean": 7857.6,
|
|
"valid_targets_min": 5394
|
|
},
|
|
{
|
|
"epoch": 1.1660329531051965,
|
|
"grad_norm": 0.36251805086421274,
|
|
"learning_rate": 3.94671423891325e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09373708069324493,
|
|
"step": 920,
|
|
"valid_targets_mean": 8314.1,
|
|
"valid_targets_min": 4837
|
|
},
|
|
{
|
|
"epoch": 1.1723700887198987,
|
|
"grad_norm": 0.3807416937580724,
|
|
"learning_rate": 3.945255122833734e-05,
|
|
"loss": 0.2053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08730170130729675,
|
|
"step": 925,
|
|
"valid_targets_mean": 7368.2,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 1.1787072243346008,
|
|
"grad_norm": 0.4450758017699842,
|
|
"learning_rate": 3.9437765753948954e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11698249727487564,
|
|
"step": 930,
|
|
"valid_targets_mean": 6435.0,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 1.1850443599493028,
|
|
"grad_norm": 1.9778349682496845,
|
|
"learning_rate": 3.9422786113661e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09871332347393036,
|
|
"step": 935,
|
|
"valid_targets_mean": 247.8,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.1913814955640052,
|
|
"grad_norm": 0.430133639803173,
|
|
"learning_rate": 3.940761245710671e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11071701347827911,
|
|
"step": 940,
|
|
"valid_targets_mean": 6807.0,
|
|
"valid_targets_min": 5165
|
|
},
|
|
{
|
|
"epoch": 1.1977186311787071,
|
|
"grad_norm": 0.4673633533308296,
|
|
"learning_rate": 3.939224493585733e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12750844657421112,
|
|
"step": 945,
|
|
"valid_targets_mean": 6989.9,
|
|
"valid_targets_min": 5104
|
|
},
|
|
{
|
|
"epoch": 1.2040557667934093,
|
|
"grad_norm": 0.4155908346134065,
|
|
"learning_rate": 3.937668370342065e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312229335308075,
|
|
"step": 950,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 5068
|
|
},
|
|
{
|
|
"epoch": 1.2103929024081115,
|
|
"grad_norm": 0.4899780530594897,
|
|
"learning_rate": 3.9360928915239484e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14193138480186462,
|
|
"step": 955,
|
|
"valid_targets_mean": 7419.6,
|
|
"valid_targets_min": 5578
|
|
},
|
|
{
|
|
"epoch": 1.2167300380228137,
|
|
"grad_norm": 0.39492453631426117,
|
|
"learning_rate": 3.934498072869008e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11551227420568466,
|
|
"step": 960,
|
|
"valid_targets_mean": 7536.5,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 1.2230671736375158,
|
|
"grad_norm": 0.380612536360672,
|
|
"learning_rate": 3.9328839303080554e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10635116696357727,
|
|
"step": 965,
|
|
"valid_targets_mean": 7221.5,
|
|
"valid_targets_min": 5841
|
|
},
|
|
{
|
|
"epoch": 1.229404309252218,
|
|
"grad_norm": 0.3920026103403897,
|
|
"learning_rate": 3.9312504799649305e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11276505887508392,
|
|
"step": 970,
|
|
"valid_targets_mean": 7287.5,
|
|
"valid_targets_min": 5151
|
|
},
|
|
{
|
|
"epoch": 1.2357414448669202,
|
|
"grad_norm": 0.4084734723430415,
|
|
"learning_rate": 3.9295977381563435e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11081689596176147,
|
|
"step": 975,
|
|
"valid_targets_mean": 6738.4,
|
|
"valid_targets_min": 4042
|
|
},
|
|
{
|
|
"epoch": 1.2420785804816223,
|
|
"grad_norm": 0.4682757027432899,
|
|
"learning_rate": 3.927925721391707e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12866833806037903,
|
|
"step": 980,
|
|
"valid_targets_mean": 6331.0,
|
|
"valid_targets_min": 5496
|
|
},
|
|
{
|
|
"epoch": 1.2484157160963245,
|
|
"grad_norm": 0.45533980696136295,
|
|
"learning_rate": 3.926234446372972e-05,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11390513926744461,
|
|
"step": 985,
|
|
"valid_targets_mean": 6271.1,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 1.2547528517110267,
|
|
"grad_norm": 0.4560293327439194,
|
|
"learning_rate": 3.9245239299944655e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11997664719820023,
|
|
"step": 990,
|
|
"valid_targets_mean": 5790.8,
|
|
"valid_targets_min": 5021
|
|
},
|
|
{
|
|
"epoch": 1.2610899873257289,
|
|
"grad_norm": 0.5282480435035835,
|
|
"learning_rate": 3.922794189342715e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09170438349246979,
|
|
"step": 995,
|
|
"valid_targets_mean": 3386.8,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 1.2674271229404308,
|
|
"grad_norm": 0.4601756561583352,
|
|
"learning_rate": 3.921045241696284e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1120515838265419,
|
|
"step": 1000,
|
|
"valid_targets_mean": 6240.5,
|
|
"valid_targets_min": 5093
|
|
},
|
|
{
|
|
"epoch": 1.2737642585551332,
|
|
"grad_norm": 0.4310870335232501,
|
|
"learning_rate": 3.9192771045255957e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11720719933509827,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6329.5,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 1.2801013941698351,
|
|
"grad_norm": 0.4552042328897915,
|
|
"learning_rate": 3.9174897954927586e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12053968757390976,
|
|
"step": 1010,
|
|
"valid_targets_mean": 7589.2,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 1.2864385297845373,
|
|
"grad_norm": 0.40121352364617846,
|
|
"learning_rate": 3.915683332451392e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09680202603340149,
|
|
"step": 1015,
|
|
"valid_targets_mean": 5577.9,
|
|
"valid_targets_min": 4458
|
|
},
|
|
{
|
|
"epoch": 1.2927756653992395,
|
|
"grad_norm": 0.5170643810410182,
|
|
"learning_rate": 3.913857733446448e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10695898532867432,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5432.5,
|
|
"valid_targets_min": 4433
|
|
},
|
|
{
|
|
"epoch": 1.2991128010139417,
|
|
"grad_norm": 0.4367734112834069,
|
|
"learning_rate": 3.912013016714027e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11636387556791306,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6361.8,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 1.3054499366286438,
|
|
"grad_norm": 0.43288258923668327,
|
|
"learning_rate": 3.910149200681199e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10549534857273102,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5824.4,
|
|
"valid_targets_min": 5129
|
|
},
|
|
{
|
|
"epoch": 1.311787072243346,
|
|
"grad_norm": 0.8939173905732886,
|
|
"learning_rate": 3.908266303965821e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556260883808136,
|
|
"step": 1035,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.3181242078580482,
|
|
"grad_norm": 0.4332017161825981,
|
|
"learning_rate": 3.9063643453763465e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11248978227376938,
|
|
"step": 1040,
|
|
"valid_targets_mean": 6474.6,
|
|
"valid_targets_min": 4751
|
|
},
|
|
{
|
|
"epoch": 1.3244613434727504,
|
|
"grad_norm": 0.44855460942898745,
|
|
"learning_rate": 3.904443343911642e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.121623694896698,
|
|
"step": 1045,
|
|
"valid_targets_mean": 7186.2,
|
|
"valid_targets_min": 5358
|
|
},
|
|
{
|
|
"epoch": 1.3307984790874525,
|
|
"grad_norm": 0.40854498920842724,
|
|
"learning_rate": 3.902503318760794e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11384068429470062,
|
|
"step": 1050,
|
|
"valid_targets_mean": 7126.8,
|
|
"valid_targets_min": 4078
|
|
},
|
|
{
|
|
"epoch": 1.3371356147021547,
|
|
"grad_norm": 0.4387309121548213,
|
|
"learning_rate": 3.90054428930292e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10983128845691681,
|
|
"step": 1055,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 4940
|
|
},
|
|
{
|
|
"epoch": 1.3434727503168569,
|
|
"grad_norm": 0.45313165611175116,
|
|
"learning_rate": 3.898566275106971e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11376477777957916,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5900.6,
|
|
"valid_targets_min": 4779
|
|
},
|
|
{
|
|
"epoch": 1.3498098859315588,
|
|
"grad_norm": 0.42225292825835004,
|
|
"learning_rate": 3.8965692959315415e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11450430750846863,
|
|
"step": 1065,
|
|
"valid_targets_mean": 6290.1,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 1.3561470215462612,
|
|
"grad_norm": 0.6593353783779575,
|
|
"learning_rate": 3.894553371724667e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12455026805400848,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1545.8,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.3624841571609632,
|
|
"grad_norm": 0.44603618338106876,
|
|
"learning_rate": 3.892518522623628e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07141514122486115,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3339.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.3688212927756653,
|
|
"grad_norm": 0.5590758136549838,
|
|
"learning_rate": 3.890464768954748e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05872485786676407,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2061.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.3751584283903675,
|
|
"grad_norm": 0.48259751659957684,
|
|
"learning_rate": 3.888392131233189e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05127622187137604,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2967.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 1.3814955640050697,
|
|
"grad_norm": 0.48353146828206217,
|
|
"learning_rate": 3.88630063016275e-05,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06223687529563904,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2676.2,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 1.3878326996197718,
|
|
"grad_norm": 0.44058175503994296,
|
|
"learning_rate": 3.884190286635655e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059681497514247894,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2992.4,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 1.394169835234474,
|
|
"grad_norm": 0.7675653292532986,
|
|
"learning_rate": 3.882061121732349e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08780942112207413,
|
|
"step": 1100,
|
|
"valid_targets_mean": 1533.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.4005069708491762,
|
|
"grad_norm": 0.42044327014852134,
|
|
"learning_rate": 3.879913156721287e-05,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057666681706905365,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3398.4,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 1.4068441064638784,
|
|
"grad_norm": 0.5246273030520889,
|
|
"learning_rate": 3.877746413058718e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05388721823692322,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2008.1,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 1.4131812420785805,
|
|
"grad_norm": 0.3105247319258976,
|
|
"learning_rate": 3.875560912388474e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04339855909347534,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4961.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.4195183776932827,
|
|
"grad_norm": 0.36564617043426784,
|
|
"learning_rate": 3.873356676541751e-05,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06978192925453186,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4281.1,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 1.4258555133079849,
|
|
"grad_norm": 0.3905871342582646,
|
|
"learning_rate": 3.8711337275368965e-05,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055003076791763306,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3922.4,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.4321926489226868,
|
|
"grad_norm": 0.36323557564965253,
|
|
"learning_rate": 3.868892087579182e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056727658957242966,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.4385297845373892,
|
|
"grad_norm": 0.4828461218977691,
|
|
"learning_rate": 3.866631779060587e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07414934039115906,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2802.8,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.4448669201520912,
|
|
"grad_norm": 0.382876580969531,
|
|
"learning_rate": 3.864352824559571e-05,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04398588091135025,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 1.4512040557667933,
|
|
"grad_norm": 0.6452143252281195,
|
|
"learning_rate": 3.8620552468408545e-05,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06592268496751785,
|
|
"step": 1145,
|
|
"valid_targets_mean": 1567.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.4575411913814955,
|
|
"grad_norm": 0.3545072918249643,
|
|
"learning_rate": 3.8597390688551826e-05,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03272652626037598,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3412.6,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 1.4638783269961977,
|
|
"grad_norm": 0.4695292279516299,
|
|
"learning_rate": 3.8574043137391026e-05,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06077080965042114,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2430.0,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.4702154626108999,
|
|
"grad_norm": 0.6002484037293271,
|
|
"learning_rate": 3.85505100481473e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0801704078912735,
|
|
"step": 1160,
|
|
"valid_targets_mean": 1596.5,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.476552598225602,
|
|
"grad_norm": 0.4640781686093506,
|
|
"learning_rate": 3.852679165589518e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0865936130285263,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3337.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.4828897338403042,
|
|
"grad_norm": 0.4152863047425929,
|
|
"learning_rate": 3.850288819756019e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06747367978096008,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2088.1,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 1.4892268694550064,
|
|
"grad_norm": 0.5607432646092826,
|
|
"learning_rate": 3.847879991191651e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04371974989771843,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3485.2,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 1.4955640050697085,
|
|
"grad_norm": 0.4618959198786898,
|
|
"learning_rate": 3.845452703958456e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044032417237758636,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4105.1,
|
|
"valid_targets_min": 3927
|
|
},
|
|
{
|
|
"epoch": 1.5019011406844105,
|
|
"grad_norm": 0.6619928633121251,
|
|
"learning_rate": 3.843006982302865e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09652753919363022,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1385.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.508238276299113,
|
|
"grad_norm": 0.4379135430527294,
|
|
"learning_rate": 3.8405428506554495e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05338441580533981,
|
|
"step": 1190,
|
|
"valid_targets_mean": 2748.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 1.5145754119138148,
|
|
"grad_norm": 0.38511517186145733,
|
|
"learning_rate": 3.838060333630681e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06358373165130615,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3963.2,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 1.5209125475285172,
|
|
"grad_norm": 0.6970184738825207,
|
|
"learning_rate": 3.835559456026686e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14463447034358978,
|
|
"step": 1200,
|
|
"valid_targets_mean": 1719.4,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 1.5272496831432192,
|
|
"grad_norm": 0.5140655599531024,
|
|
"learning_rate": 3.8330402428249945e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06294423341751099,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3288.6,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 1.5335868187579216,
|
|
"grad_norm": 0.46734965741971396,
|
|
"learning_rate": 3.8305027191902945e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04987592250108719,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2379.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.5399239543726235,
|
|
"grad_norm": 0.46672749696948496,
|
|
"learning_rate": 3.827946910470178e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07748211920261383,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.5462610899873257,
|
|
"grad_norm": 0.4363201313052662,
|
|
"learning_rate": 3.825372842194888e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0853273868560791,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4058.1,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 1.5525982256020279,
|
|
"grad_norm": 0.27881729477463774,
|
|
"learning_rate": 3.8227805400770665e-05,
|
|
"loss": 0.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05209629237651825,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3983.5,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 1.55893536121673,
|
|
"grad_norm": 0.35921933752907975,
|
|
"learning_rate": 3.820170030011493e-05,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05065471678972244,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3440.0,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.5652724968314322,
|
|
"grad_norm": 0.399625980945358,
|
|
"learning_rate": 3.8175413380748285e-05,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06763225793838501,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3540.6,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 1.5716096324461344,
|
|
"grad_norm": 0.3655057096264632,
|
|
"learning_rate": 3.814894490525356e-05,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04089631885290146,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3660.2,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 1.5779467680608366,
|
|
"grad_norm": 0.423450075101986,
|
|
"learning_rate": 3.812229513802714e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05158841609954834,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3504.1,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 1.5842839036755385,
|
|
"grad_norm": 0.3897234600664158,
|
|
"learning_rate": 3.809546434527639e-05,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05948162078857422,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3628.4,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 1.590621039290241,
|
|
"grad_norm": 0.7084274474809971,
|
|
"learning_rate": 3.806845279501693e-05,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04768751189112663,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3437.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.5969581749049429,
|
|
"grad_norm": 0.5873788335053718,
|
|
"learning_rate": 3.804126075707e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13429638743400574,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2623.4,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.6032953105196452,
|
|
"grad_norm": 0.3477330293932954,
|
|
"learning_rate": 3.801388850305974e-05,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04172452539205551,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3695.5,
|
|
"valid_targets_min": 2877
|
|
},
|
|
{
|
|
"epoch": 1.6096324461343472,
|
|
"grad_norm": 0.6131820772290664,
|
|
"learning_rate": 3.798633630641049e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08282414823770523,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2015.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.6159695817490496,
|
|
"grad_norm": 0.3328774837480846,
|
|
"learning_rate": 3.7958604442344055e-05,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04383821040391922,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3648.8,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 1.6223067173637515,
|
|
"grad_norm": 0.5774975138870986,
|
|
"learning_rate": 3.7930693187876976e-05,
|
|
"loss": 0.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06436209380626678,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2543.6,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.6286438529784537,
|
|
"grad_norm": 0.36844004015991066,
|
|
"learning_rate": 3.790260282181772e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04883971065282822,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3863.6,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 1.6349809885931559,
|
|
"grad_norm": 0.42445140202426634,
|
|
"learning_rate": 3.787433362476393e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0897354930639267,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2883.1,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.641318124207858,
|
|
"grad_norm": 0.4235282192699392,
|
|
"learning_rate": 3.78458858790996e-05,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05497494712471962,
|
|
"step": 1295,
|
|
"valid_targets_mean": 1581.1,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 1.6476552598225602,
|
|
"grad_norm": 0.8249484056729068,
|
|
"learning_rate": 3.781725986899227e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12709879875183105,
|
|
"step": 1300,
|
|
"valid_targets_mean": 1356.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.6539923954372624,
|
|
"grad_norm": 0.5949375324550535,
|
|
"learning_rate": 3.7788455880390205e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09040827304124832,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2382.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 1.6603295310519646,
|
|
"grad_norm": 0.5345868645730363,
|
|
"learning_rate": 3.775947420101948e-05,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05591303110122681,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2193.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.38458074031995787,
|
|
"learning_rate": 3.773031512038116e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06099797040224075,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 1.673003802281369,
|
|
"grad_norm": 0.642860447792019,
|
|
"learning_rate": 3.770097892974838e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09495936334133148,
|
|
"step": 1320,
|
|
"valid_targets_mean": 1787.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.6793409378960709,
|
|
"grad_norm": 0.37717334524708235,
|
|
"learning_rate": 3.767146592216347e-05,
|
|
"loss": 0.1077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04648321866989136,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2625.9,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 1.6856780735107733,
|
|
"grad_norm": 0.26951963264468887,
|
|
"learning_rate": 3.764177639243498e-05,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030064940452575684,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5125.8,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 1.6920152091254752,
|
|
"grad_norm": 0.42190050061457707,
|
|
"learning_rate": 3.761191063713476e-05,
|
|
"loss": 0.1053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04663790762424469,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3718.0,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.6983523447401776,
|
|
"grad_norm": 0.33627608213720384,
|
|
"learning_rate": 3.758186895459501e-05,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0536949522793293,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3669.1,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 1.7046894803548795,
|
|
"grad_norm": 0.5114885130556783,
|
|
"learning_rate": 3.755165164490527e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056188516318798065,
|
|
"step": 1345,
|
|
"valid_targets_mean": 1833.0,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.7110266159695817,
|
|
"grad_norm": 0.41460416274298745,
|
|
"learning_rate": 3.752125900990944e-05,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054467666894197464,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3618.4,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 1.717363751584284,
|
|
"grad_norm": 0.4219514139109562,
|
|
"learning_rate": 3.7490691353202783e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05756935477256775,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 1.723700887198986,
|
|
"grad_norm": 0.31744171859354087,
|
|
"learning_rate": 3.745994898012884e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04886405169963837,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3460.1,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.7300380228136882,
|
|
"grad_norm": 0.434500399978404,
|
|
"learning_rate": 3.7429032197776445e-05,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0489216074347496,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2901.4,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.7363751584283904,
|
|
"grad_norm": 0.34917926971119323,
|
|
"learning_rate": 3.73979413149766e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0524873360991478,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3648.0,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 1.7427122940430926,
|
|
"grad_norm": 0.31459336070635135,
|
|
"learning_rate": 3.736667664229943e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04770734906196594,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3957.9,
|
|
"valid_targets_min": 3718
|
|
},
|
|
{
|
|
"epoch": 1.7490494296577945,
|
|
"grad_norm": 0.4111092782440353,
|
|
"learning_rate": 3.733523849205105e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050176799297332764,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2735.0,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.755386565272497,
|
|
"grad_norm": 0.37710096363510465,
|
|
"learning_rate": 3.730362717827049e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042826227843761444,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3689.2,
|
|
"valid_targets_min": 2587
|
|
},
|
|
{
|
|
"epoch": 1.7617237008871989,
|
|
"grad_norm": 0.40871845256384803,
|
|
"learning_rate": 3.727184301672649e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05368582904338837,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3088.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.7680608365019013,
|
|
"grad_norm": 0.4683011135863312,
|
|
"learning_rate": 3.723988632491441e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06313968449831009,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2951.0,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 1.7743979721166032,
|
|
"grad_norm": 0.7116826194149133,
|
|
"learning_rate": 3.720775742205302e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07318995147943497,
|
|
"step": 1400,
|
|
"valid_targets_mean": 1399.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.7807351077313056,
|
|
"grad_norm": 0.5619882168617812,
|
|
"learning_rate": 3.717545662908135e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06628923118114471,
|
|
"step": 1405,
|
|
"valid_targets_mean": 1881.5,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.7870722433460076,
|
|
"grad_norm": 0.43617132381901585,
|
|
"learning_rate": 3.714298426865542e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05978765711188316,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2534.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.7934093789607097,
|
|
"grad_norm": 0.4598548586068117,
|
|
"learning_rate": 3.711034066514509e-05,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06336797773838043,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.799746514575412,
|
|
"grad_norm": 0.6212340752677732,
|
|
"learning_rate": 3.7077526144630764e-05,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07128962874412537,
|
|
"step": 1420,
|
|
"valid_targets_mean": 1503.4,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.806083650190114,
|
|
"grad_norm": 0.6868292069516538,
|
|
"learning_rate": 3.704454103490016e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12385895103216171,
|
|
"step": 1425,
|
|
"valid_targets_mean": 1759.2,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.8124207858048162,
|
|
"grad_norm": 0.30594603802339987,
|
|
"learning_rate": 3.7011385665445036e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029884271323680878,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3772.2,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 1.8187579214195184,
|
|
"grad_norm": 0.3607546589841509,
|
|
"learning_rate": 3.697806036745788e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04510767012834549,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3916.1,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 1.8250950570342206,
|
|
"grad_norm": 0.39232699042876845,
|
|
"learning_rate": 3.694456547382863e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04957287758588791,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2122.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.8314321926489225,
|
|
"grad_norm": 0.39958638881121333,
|
|
"learning_rate": 3.691090131914131e-05,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07139233499765396,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5008.0,
|
|
"valid_targets_min": 4222
|
|
},
|
|
{
|
|
"epoch": 1.837769328263625,
|
|
"grad_norm": 0.3701844730327437,
|
|
"learning_rate": 3.687706823967073e-05,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05855526775121689,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4978.8,
|
|
"valid_targets_min": 4104
|
|
},
|
|
{
|
|
"epoch": 1.8441064638783269,
|
|
"grad_norm": 0.40150261501501977,
|
|
"learning_rate": 3.6843066573379106e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048229560256004333,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2777.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.8504435994930293,
|
|
"grad_norm": 0.7611712527881811,
|
|
"learning_rate": 3.680889665991269e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06210283935070038,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 3352
|
|
},
|
|
{
|
|
"epoch": 1.8567807351077312,
|
|
"grad_norm": 0.40968325884898316,
|
|
"learning_rate": 3.677455884059835e-05,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06243611127138138,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3456.0,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.8631178707224336,
|
|
"grad_norm": 0.3669430418838595,
|
|
"learning_rate": 3.67400534584402e-05,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0550384521484375,
|
|
"step": 1470,
|
|
"valid_targets_mean": 2187.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.8694550063371356,
|
|
"grad_norm": 0.33644080829651224,
|
|
"learning_rate": 3.670538085811616e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0416230782866478,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2997.9,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.8757921419518377,
|
|
"grad_norm": 0.43239344990807466,
|
|
"learning_rate": 3.6670541385974496e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05289187282323837,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3161.9,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 1.88212927756654,
|
|
"grad_norm": 0.4207202795075579,
|
|
"learning_rate": 3.663553539003039e-05,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047880709171295166,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2517.6,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.888466413181242,
|
|
"grad_norm": 0.478021143403731,
|
|
"learning_rate": 3.6600363219962444e-05,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05736742168664932,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2190.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 1.8948035487959443,
|
|
"grad_norm": 0.595890535731268,
|
|
"learning_rate": 3.656502522710917e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05245162546634674,
|
|
"step": 1495,
|
|
"valid_targets_mean": 1784.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.9011406844106464,
|
|
"grad_norm": 0.40941080337595115,
|
|
"learning_rate": 3.6529521764465545e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06454703211784363,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3771.6,
|
|
"valid_targets_min": 2811
|
|
},
|
|
{
|
|
"epoch": 1.9074778200253486,
|
|
"grad_norm": 0.46404118409277323,
|
|
"learning_rate": 3.64938531866794e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04912545531988144,
|
|
"step": 1505,
|
|
"valid_targets_mean": 1885.5,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.9138149556400506,
|
|
"grad_norm": 0.4078025497491413,
|
|
"learning_rate": 3.645801985004793e-05,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056172389537096024,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3215.2,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 1.920152091254753,
|
|
"grad_norm": 0.3826936252768697,
|
|
"learning_rate": 3.642202211251414e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04339641332626343,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2493.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.926489226869455,
|
|
"grad_norm": 0.3499838915319426,
|
|
"learning_rate": 3.6385860333663236e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05503877252340317,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3232.9,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.9328263624841573,
|
|
"grad_norm": 0.4205733595692656,
|
|
"learning_rate": 3.634953487471905e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05913494527339935,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3239.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 1.9391634980988592,
|
|
"grad_norm": 0.35056673512919967,
|
|
"learning_rate": 3.631304609854044e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04289105534553528,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3656.8,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 1.9455006337135616,
|
|
"grad_norm": 0.4986484086349854,
|
|
"learning_rate": 3.6276394369617654e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06117460876703262,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2134.0,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 1.9518377693282636,
|
|
"grad_norm": 0.4546352543793496,
|
|
"learning_rate": 3.6239580054068684e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07200893759727478,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3470.4,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 1.9581749049429658,
|
|
"grad_norm": 0.36003990592394647,
|
|
"learning_rate": 3.620260351963564e-05,
|
|
"loss": 0.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06171542406082153,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3838.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 1.964512040557668,
|
|
"grad_norm": 0.3812022004127798,
|
|
"learning_rate": 3.616546513568102e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06386801600456238,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3975.0,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 1.97084917617237,
|
|
"grad_norm": 0.32941651904737446,
|
|
"learning_rate": 3.6128165273184085e-05,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04119953513145447,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 1.9771863117870723,
|
|
"grad_norm": 0.5673815082201125,
|
|
"learning_rate": 3.6090704304737105e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12394223362207413,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2070.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.9835234474017744,
|
|
"grad_norm": 0.3213756492692534,
|
|
"learning_rate": 3.605308260454166e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03999151289463043,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3756.6,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 1.9898605830164766,
|
|
"grad_norm": 0.35162310648646944,
|
|
"learning_rate": 3.60153005484049e-05,
|
|
"loss": 0.114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05818267539143562,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3133.0,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 1.9961977186311786,
|
|
"grad_norm": 0.3489277489222169,
|
|
"learning_rate": 3.597735851373578e-05,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05143667012453079,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3714.0,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 2.002534854245881,
|
|
"grad_norm": 1.0075240981574034,
|
|
"learning_rate": 3.5939256879541295e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10621477663516998,
|
|
"step": 1580,
|
|
"valid_targets_mean": 8514.0,
|
|
"valid_targets_min": 6788
|
|
},
|
|
{
|
|
"epoch": 2.008871989860583,
|
|
"grad_norm": 0.437748978264818,
|
|
"learning_rate": 3.590099602642271e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11550945043563843,
|
|
"step": 1585,
|
|
"valid_targets_mean": 6668.0,
|
|
"valid_targets_min": 5933
|
|
},
|
|
{
|
|
"epoch": 2.0152091254752853,
|
|
"grad_norm": 0.4876176443907508,
|
|
"learning_rate": 3.5862576336571725e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10253625363111496,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4107.0,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 2.0215462610899873,
|
|
"grad_norm": 0.39782871301853745,
|
|
"learning_rate": 3.5823998193766704e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09979207068681717,
|
|
"step": 1595,
|
|
"valid_targets_mean": 7946.0,
|
|
"valid_targets_min": 5596
|
|
},
|
|
{
|
|
"epoch": 2.0278833967046896,
|
|
"grad_norm": 0.4199787922750572,
|
|
"learning_rate": 3.5785261983368786e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1133679449558258,
|
|
"step": 1600,
|
|
"valid_targets_mean": 7392.8,
|
|
"valid_targets_min": 5629
|
|
},
|
|
{
|
|
"epoch": 2.0342205323193916,
|
|
"grad_norm": 2.179300953234272,
|
|
"learning_rate": 3.574636809231809e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11244471371173859,
|
|
"step": 1605,
|
|
"valid_targets_mean": 6879.6,
|
|
"valid_targets_min": 5113
|
|
},
|
|
{
|
|
"epoch": 2.040557667934094,
|
|
"grad_norm": 0.3952407404195043,
|
|
"learning_rate": 3.570731690912979e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11205905675888062,
|
|
"step": 1610,
|
|
"valid_targets_mean": 6919.5,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 2.046894803548796,
|
|
"grad_norm": 0.4166100354524257,
|
|
"learning_rate": 3.5668108823890306e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09917525947093964,
|
|
"step": 1615,
|
|
"valid_targets_mean": 6377.0,
|
|
"valid_targets_min": 3939
|
|
},
|
|
{
|
|
"epoch": 2.053231939163498,
|
|
"grad_norm": 0.3798829525464805,
|
|
"learning_rate": 3.562874422825335e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617680847644806,
|
|
"step": 1620,
|
|
"valid_targets_mean": 6392.2,
|
|
"valid_targets_min": 4355
|
|
},
|
|
{
|
|
"epoch": 2.0595690747782003,
|
|
"grad_norm": 0.5258872919719788,
|
|
"learning_rate": 3.5589223515436026e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12014295160770416,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5196.5,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 2.0659062103929022,
|
|
"grad_norm": 0.3765169059435386,
|
|
"learning_rate": 3.5549547080214926e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09414888918399811,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7344.4,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 2.0722433460076046,
|
|
"grad_norm": 0.3364909338482532,
|
|
"learning_rate": 3.550971531892215e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08887539803981781,
|
|
"step": 1635,
|
|
"valid_targets_mean": 7921.0,
|
|
"valid_targets_min": 5311
|
|
},
|
|
{
|
|
"epoch": 2.0785804816223066,
|
|
"grad_norm": 0.3746799797722927,
|
|
"learning_rate": 3.54697286294414e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10735895484685898,
|
|
"step": 1640,
|
|
"valid_targets_mean": 7931.2,
|
|
"valid_targets_min": 6015
|
|
},
|
|
{
|
|
"epoch": 2.084917617237009,
|
|
"grad_norm": 0.3642797443422459,
|
|
"learning_rate": 3.542958741120392e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09577775746583939,
|
|
"step": 1645,
|
|
"valid_targets_mean": 7640.8,
|
|
"valid_targets_min": 5248
|
|
},
|
|
{
|
|
"epoch": 2.091254752851711,
|
|
"grad_norm": 0.37841668237507164,
|
|
"learning_rate": 3.538929206518461e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005602240562439,
|
|
"step": 1650,
|
|
"valid_targets_mean": 7249.6,
|
|
"valid_targets_min": 5217
|
|
},
|
|
{
|
|
"epoch": 2.0975918884664133,
|
|
"grad_norm": 0.3996702493407355,
|
|
"learning_rate": 3.5348842993897934e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11531486362218857,
|
|
"step": 1655,
|
|
"valid_targets_mean": 7619.9,
|
|
"valid_targets_min": 5094
|
|
},
|
|
{
|
|
"epoch": 2.1039290240811153,
|
|
"grad_norm": 0.3776681966185826,
|
|
"learning_rate": 3.530824060139396e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09734298288822174,
|
|
"step": 1660,
|
|
"valid_targets_mean": 7732.8,
|
|
"valid_targets_min": 5842
|
|
},
|
|
{
|
|
"epoch": 2.1102661596958177,
|
|
"grad_norm": 0.35985474237200693,
|
|
"learning_rate": 3.526748529325427e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10541939735412598,
|
|
"step": 1665,
|
|
"valid_targets_mean": 7952.9,
|
|
"valid_targets_min": 5494
|
|
},
|
|
{
|
|
"epoch": 2.1166032953105196,
|
|
"grad_norm": 0.4457822151851229,
|
|
"learning_rate": 3.5226577476587956e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1096581220626831,
|
|
"step": 1670,
|
|
"valid_targets_mean": 8001.0,
|
|
"valid_targets_min": 5454
|
|
},
|
|
{
|
|
"epoch": 2.122940430925222,
|
|
"grad_norm": 0.5164741868257,
|
|
"learning_rate": 3.518551756002753e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07909304648637772,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2985.1,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 2.129277566539924,
|
|
"grad_norm": 0.4051330574792907,
|
|
"learning_rate": 3.5144305953724847e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0884946957230568,
|
|
"step": 1680,
|
|
"valid_targets_mean": 6714.6,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 2.1356147021546263,
|
|
"grad_norm": 0.42152955296826333,
|
|
"learning_rate": 3.5102943069347004e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11064820736646652,
|
|
"step": 1685,
|
|
"valid_targets_mean": 7595.2,
|
|
"valid_targets_min": 5488
|
|
},
|
|
{
|
|
"epoch": 2.1419518377693283,
|
|
"grad_norm": 0.42759769123999114,
|
|
"learning_rate": 3.5061429320072225e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10958842933177948,
|
|
"step": 1690,
|
|
"valid_targets_mean": 7046.1,
|
|
"valid_targets_min": 5579
|
|
},
|
|
{
|
|
"epoch": 2.1482889733840302,
|
|
"grad_norm": 0.42333741508380845,
|
|
"learning_rate": 3.501976512058574e-05,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10335630923509598,
|
|
"step": 1695,
|
|
"valid_targets_mean": 7108.0,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 2.1546261089987326,
|
|
"grad_norm": 0.4051481036858486,
|
|
"learning_rate": 3.497795088707567e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1035148948431015,
|
|
"step": 1700,
|
|
"valid_targets_mean": 6810.1,
|
|
"valid_targets_min": 5666
|
|
},
|
|
{
|
|
"epoch": 2.1609632446134346,
|
|
"grad_norm": 0.5999224311710737,
|
|
"learning_rate": 3.493598703722881e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09677618741989136,
|
|
"step": 1705,
|
|
"valid_targets_mean": 8068.5,
|
|
"valid_targets_min": 5524
|
|
},
|
|
{
|
|
"epoch": 2.167300380228137,
|
|
"grad_norm": 0.4270020852998751,
|
|
"learning_rate": 3.489387399022649e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08485183119773865,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6553.8,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 2.173637515842839,
|
|
"grad_norm": 0.4050413837361388,
|
|
"learning_rate": 3.48516121667404e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09748750925064087,
|
|
"step": 1715,
|
|
"valid_targets_mean": 6762.5,
|
|
"valid_targets_min": 4968
|
|
},
|
|
{
|
|
"epoch": 2.1799746514575413,
|
|
"grad_norm": 0.4189180360515352,
|
|
"learning_rate": 3.480920198892836e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09892107546329498,
|
|
"step": 1720,
|
|
"valid_targets_mean": 6143.9,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 2.1863117870722433,
|
|
"grad_norm": 0.5480109464822124,
|
|
"learning_rate": 3.476664388043013e-05,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985930383205414,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4583.8,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 2.1926489226869457,
|
|
"grad_norm": 0.37553875832473627,
|
|
"learning_rate": 3.472393826636317e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10636749863624573,
|
|
"step": 1730,
|
|
"valid_targets_mean": 7317.1,
|
|
"valid_targets_min": 4948
|
|
},
|
|
{
|
|
"epoch": 2.1989860583016476,
|
|
"grad_norm": 0.41718694682578167,
|
|
"learning_rate": 3.468108557331837e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629099607467651,
|
|
"step": 1735,
|
|
"valid_targets_mean": 6437.6,
|
|
"valid_targets_min": 4510
|
|
},
|
|
{
|
|
"epoch": 2.20532319391635,
|
|
"grad_norm": 0.4217155586021107,
|
|
"learning_rate": 3.4638086229355825e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11597402393817902,
|
|
"step": 1740,
|
|
"valid_targets_mean": 7707.9,
|
|
"valid_targets_min": 5796
|
|
},
|
|
{
|
|
"epoch": 2.211660329531052,
|
|
"grad_norm": 0.370037285388258,
|
|
"learning_rate": 3.459494066400052e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09958069026470184,
|
|
"step": 1745,
|
|
"valid_targets_mean": 7900.9,
|
|
"valid_targets_min": 5643
|
|
},
|
|
{
|
|
"epoch": 2.2179974651457544,
|
|
"grad_norm": 0.398492882072698,
|
|
"learning_rate": 3.455164930823808e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11191865801811218,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7627.2,
|
|
"valid_targets_min": 5742
|
|
},
|
|
{
|
|
"epoch": 2.2243346007604563,
|
|
"grad_norm": 0.3771035766117461,
|
|
"learning_rate": 3.4508212594510446e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11967207491397858,
|
|
"step": 1755,
|
|
"valid_targets_mean": 8725.6,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 2.2306717363751583,
|
|
"grad_norm": 0.4735866795576995,
|
|
"learning_rate": 3.4464630956711534e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438523232936859,
|
|
"step": 1760,
|
|
"valid_targets_mean": 6967.8,
|
|
"valid_targets_min": 6088
|
|
},
|
|
{
|
|
"epoch": 2.2370088719898606,
|
|
"grad_norm": 0.35993943837166126,
|
|
"learning_rate": 3.442090483018295e-05,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09623050689697266,
|
|
"step": 1765,
|
|
"valid_targets_mean": 7140.1,
|
|
"valid_targets_min": 4834
|
|
},
|
|
{
|
|
"epoch": 2.2433460076045626,
|
|
"grad_norm": 0.3851190721137925,
|
|
"learning_rate": 3.437703465170961e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0969136506319046,
|
|
"step": 1770,
|
|
"valid_targets_mean": 6706.9,
|
|
"valid_targets_min": 5108
|
|
},
|
|
{
|
|
"epoch": 2.249683143219265,
|
|
"grad_norm": 0.42724247928612435,
|
|
"learning_rate": 3.4333020859515346e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10331173241138458,
|
|
"step": 1775,
|
|
"valid_targets_mean": 5931.4,
|
|
"valid_targets_min": 4708
|
|
},
|
|
{
|
|
"epoch": 2.256020278833967,
|
|
"grad_norm": 0.38873202811563834,
|
|
"learning_rate": 3.42888638932586e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09628495573997498,
|
|
"step": 1780,
|
|
"valid_targets_mean": 7145.6,
|
|
"valid_targets_min": 5280
|
|
},
|
|
{
|
|
"epoch": 2.2623574144486693,
|
|
"grad_norm": 0.6959104087632724,
|
|
"learning_rate": 3.424456419402798e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06746724247932434,
|
|
"step": 1785,
|
|
"valid_targets_mean": 1554.9,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.2686945500633713,
|
|
"grad_norm": 0.5135563547321258,
|
|
"learning_rate": 3.420012220433787e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11715088784694672,
|
|
"step": 1790,
|
|
"valid_targets_mean": 7069.5,
|
|
"valid_targets_min": 4765
|
|
},
|
|
{
|
|
"epoch": 2.2750316856780737,
|
|
"grad_norm": 0.4467163762509169,
|
|
"learning_rate": 3.4155538368124015e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10094669461250305,
|
|
"step": 1795,
|
|
"valid_targets_mean": 6193.4,
|
|
"valid_targets_min": 5587
|
|
},
|
|
{
|
|
"epoch": 2.2813688212927756,
|
|
"grad_norm": 0.41010329042728166,
|
|
"learning_rate": 3.411081313073906e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10623825341463089,
|
|
"step": 1800,
|
|
"valid_targets_mean": 6612.2,
|
|
"valid_targets_min": 4894
|
|
},
|
|
{
|
|
"epoch": 2.2877059569074776,
|
|
"grad_norm": 0.4164831377154972,
|
|
"learning_rate": 3.406594693894815e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09428498148918152,
|
|
"step": 1805,
|
|
"valid_targets_mean": 6605.9,
|
|
"valid_targets_min": 3795
|
|
},
|
|
{
|
|
"epoch": 2.29404309252218,
|
|
"grad_norm": 0.4815798230684326,
|
|
"learning_rate": 3.402094024092442e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10269315540790558,
|
|
"step": 1810,
|
|
"valid_targets_mean": 7186.8,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 2.3003802281368824,
|
|
"grad_norm": 0.4471948708087744,
|
|
"learning_rate": 3.397579348624454e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033300906419754,
|
|
"step": 1815,
|
|
"valid_targets_mean": 6544.4,
|
|
"valid_targets_min": 4615
|
|
},
|
|
{
|
|
"epoch": 2.3067173637515843,
|
|
"grad_norm": 0.444889145454382,
|
|
"learning_rate": 3.3930507125884216e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10947813838720322,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6339.2,
|
|
"valid_targets_min": 4798
|
|
},
|
|
{
|
|
"epoch": 2.3130544993662863,
|
|
"grad_norm": 0.5454481785127888,
|
|
"learning_rate": 3.38850816122137e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162998154759407,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 148
|
|
},
|
|
{
|
|
"epoch": 2.3193916349809887,
|
|
"grad_norm": 0.43151811308370447,
|
|
"learning_rate": 3.383951739899326e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11234039068222046,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6807.9,
|
|
"valid_targets_min": 5164
|
|
},
|
|
{
|
|
"epoch": 2.3257287705956906,
|
|
"grad_norm": 0.40331627255961705,
|
|
"learning_rate": 3.379381494136863e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10068431496620178,
|
|
"step": 1835,
|
|
"valid_targets_mean": 7011.8,
|
|
"valid_targets_min": 4974
|
|
},
|
|
{
|
|
"epoch": 2.332065906210393,
|
|
"grad_norm": 0.41268489302372374,
|
|
"learning_rate": 3.374797469586651e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09164327383041382,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5794.2,
|
|
"valid_targets_min": 4809
|
|
},
|
|
{
|
|
"epoch": 2.338403041825095,
|
|
"grad_norm": 0.4157812451013273,
|
|
"learning_rate": 3.370199712038997e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09110420197248459,
|
|
"step": 1845,
|
|
"valid_targets_mean": 6401.8,
|
|
"valid_targets_min": 4842
|
|
},
|
|
{
|
|
"epoch": 2.3447401774397973,
|
|
"grad_norm": 0.43710506961183865,
|
|
"learning_rate": 3.365588267421385e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10231119394302368,
|
|
"step": 1850,
|
|
"valid_targets_mean": 6182.1,
|
|
"valid_targets_min": 5116
|
|
},
|
|
{
|
|
"epoch": 2.3510773130544993,
|
|
"grad_norm": 0.38673699619935736,
|
|
"learning_rate": 3.360963181798027e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09074361622333527,
|
|
"step": 1855,
|
|
"valid_targets_mean": 6165.1,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 2.3574144486692017,
|
|
"grad_norm": 0.4395930428090754,
|
|
"learning_rate": 3.356324501369391e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06579431891441345,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3826.8,
|
|
"valid_targets_min": 2830
|
|
},
|
|
{
|
|
"epoch": 2.3637515842839036,
|
|
"grad_norm": 0.3828072326676679,
|
|
"learning_rate": 3.3516722724717463e-05,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0516241118311882,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3543.6,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 2.3700887198986056,
|
|
"grad_norm": 0.36096566316940415,
|
|
"learning_rate": 3.3470065415767004e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029705673456192017,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3440.6,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 2.376425855513308,
|
|
"grad_norm": 0.43726863648408876,
|
|
"learning_rate": 3.342327355290733e-05,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07802140712738037,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3379.9,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 2.3827629911280104,
|
|
"grad_norm": 0.46134940384413475,
|
|
"learning_rate": 3.337634760354732e-05,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053947288542985916,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2371.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.3891001267427123,
|
|
"grad_norm": 0.4203738471482053,
|
|
"learning_rate": 3.332928803643524e-05,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05380265414714813,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2782.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.3954372623574143,
|
|
"grad_norm": 0.8041285879401208,
|
|
"learning_rate": 3.3282095321654084e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08791178464889526,
|
|
"step": 1890,
|
|
"valid_targets_mean": 1278.2,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.4017743979721167,
|
|
"grad_norm": 0.3894661096892358,
|
|
"learning_rate": 3.323476993061689e-05,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04944770783185959,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2677.9,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 2.4081115335868186,
|
|
"grad_norm": 0.4921587472540181,
|
|
"learning_rate": 3.3187312336061975e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08801449835300446,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2618.5,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 2.414448669201521,
|
|
"grad_norm": 0.3226730005083701,
|
|
"learning_rate": 3.31397230120483e-05,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039051029831171036,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4685.1,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 2.420785804816223,
|
|
"grad_norm": 0.32886300296450194,
|
|
"learning_rate": 3.309200243395064e-05,
|
|
"loss": 0.0973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04519342631101608,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3595.2,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 2.4271229404309254,
|
|
"grad_norm": 0.3615978220811398,
|
|
"learning_rate": 3.304415107845491e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0569930374622345,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3785.8,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 2.4334600760456273,
|
|
"grad_norm": 0.5576971342340667,
|
|
"learning_rate": 3.2996169423553366e-05,
|
|
"loss": 0.1017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06427560746669769,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2842.8,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.4397972116603297,
|
|
"grad_norm": 0.30027828815058205,
|
|
"learning_rate": 3.294805794853984e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043211959302425385,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4283.1,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 2.4461343472750317,
|
|
"grad_norm": 0.35148203150957624,
|
|
"learning_rate": 3.289981713400493e-05,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04528707265853882,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3377.5,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 2.4524714828897336,
|
|
"grad_norm": 0.3829914683066545,
|
|
"learning_rate": 3.285144746183126e-05,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04828431084752083,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2831.0,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 2.458808618504436,
|
|
"grad_norm": 0.3630380812612516,
|
|
"learning_rate": 3.28029494151886e-05,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045946016907691956,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.4651457541191384,
|
|
"grad_norm": 0.6243671163820733,
|
|
"learning_rate": 3.275432347852906e-05,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05711649730801582,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1252.1,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 2.4714828897338403,
|
|
"grad_norm": 0.4476062067801325,
|
|
"learning_rate": 3.2705570137582286e-05,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04008891433477402,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2078.0,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.4778200253485423,
|
|
"grad_norm": 0.4182438311429374,
|
|
"learning_rate": 3.2656689879350546e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05722714960575104,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3404.6,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 2.4841571609632447,
|
|
"grad_norm": 0.41870105082146963,
|
|
"learning_rate": 3.2607683192103925e-05,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058014512062072754,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2491.1,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 2.4904942965779466,
|
|
"grad_norm": 0.3839746723259662,
|
|
"learning_rate": 3.255855056537539e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05868365243077278,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4201.8,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 2.496831432192649,
|
|
"grad_norm": 0.3992286639525555,
|
|
"learning_rate": 3.250929248995597e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03774121403694153,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2519.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.503168567807351,
|
|
"grad_norm": 0.6144977154392093,
|
|
"learning_rate": 3.245990945788979e-05,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07725794613361359,
|
|
"step": 1975,
|
|
"valid_targets_mean": 1699.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.5095057034220534,
|
|
"grad_norm": 0.8025045705012676,
|
|
"learning_rate": 3.2410401962469186e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0759817361831665,
|
|
"step": 1980,
|
|
"valid_targets_mean": 1441.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 2.5158428390367553,
|
|
"grad_norm": 0.4070452288590703,
|
|
"learning_rate": 3.236077049822976e-05,
|
|
"loss": 0.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05362062156200409,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3970.5,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 2.5221799746514577,
|
|
"grad_norm": 0.5087994653909331,
|
|
"learning_rate": 3.2311015560945466e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13034749031066895,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3030.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.5285171102661597,
|
|
"grad_norm": 0.36860681356180997,
|
|
"learning_rate": 3.226113764762365e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050305165350437164,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3930.0,
|
|
"valid_targets_min": 3005
|
|
},
|
|
{
|
|
"epoch": 2.5348542458808616,
|
|
"grad_norm": 0.5261247576532232,
|
|
"learning_rate": 3.221113725650005e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05671953037381172,
|
|
"step": 2000,
|
|
"valid_targets_mean": 1674.9,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.541191381495564,
|
|
"grad_norm": 0.5370439516218026,
|
|
"learning_rate": 3.216101488703387e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1546632945537567,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2687.0,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 2.5475285171102664,
|
|
"grad_norm": 0.331146808642833,
|
|
"learning_rate": 3.211077103990278e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044136203825473785,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4697.8,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 2.5538656527249683,
|
|
"grad_norm": 0.38367252246624867,
|
|
"learning_rate": 3.2060406216997866e-05,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07925865799188614,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4354.5,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 2.5602027883396703,
|
|
"grad_norm": 0.39554364792276075,
|
|
"learning_rate": 3.2009920921418684e-05,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06858916580677032,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4539.1,
|
|
"valid_targets_min": 3028
|
|
},
|
|
{
|
|
"epoch": 2.5665399239543727,
|
|
"grad_norm": 0.3105850423308268,
|
|
"learning_rate": 3.19593156574682e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03855207562446594,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3895.9,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 2.5728770595690746,
|
|
"grad_norm": 0.3761007536832546,
|
|
"learning_rate": 3.190859093064774e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03847386687994003,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3218.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 2.579214195183777,
|
|
"grad_norm": 0.4115445891951164,
|
|
"learning_rate": 3.1857747247651976e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06127840280532837,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3767.4,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 2.585551330798479,
|
|
"grad_norm": 0.4701677547651818,
|
|
"learning_rate": 3.180678511636384e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05447680875658989,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2582.0,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 2.5918884664131814,
|
|
"grad_norm": 0.4376058426466108,
|
|
"learning_rate": 3.1755705045849465e-05,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05583558231592178,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2949.8,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.5982256020278833,
|
|
"grad_norm": 0.5395229768121144,
|
|
"learning_rate": 3.170450754635307e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09253207594156265,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3169.6,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 2.6045627376425857,
|
|
"grad_norm": 0.41616251307511865,
|
|
"learning_rate": 3.1653193129291886e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0426519475877285,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3529.5,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 2.6108998732572877,
|
|
"grad_norm": 0.4216905880529936,
|
|
"learning_rate": 3.160176230725109e-05,
|
|
"loss": 0.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04937718063592911,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3104.9,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 2.6172370088719896,
|
|
"grad_norm": 0.3544201933805506,
|
|
"learning_rate": 3.155021559397857e-05,
|
|
"loss": 0.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05064213275909424,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3848.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.623574144486692,
|
|
"grad_norm": 0.43416251557273966,
|
|
"learning_rate": 3.149855350437992e-05,
|
|
"loss": 0.1069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046014584600925446,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2418.1,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 2.6299112801013944,
|
|
"grad_norm": 0.3409020660594356,
|
|
"learning_rate": 3.144677655451322e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046113520860672,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3681.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 2.6362484157160964,
|
|
"grad_norm": 0.5430764998867781,
|
|
"learning_rate": 3.13948852615839e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05750581994652748,
|
|
"step": 2080,
|
|
"valid_targets_mean": 1876.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.6425855513307983,
|
|
"grad_norm": 0.6523061189150164,
|
|
"learning_rate": 3.134288014393958e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10271206498146057,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2323.8,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 2.6489226869455007,
|
|
"grad_norm": 0.5306332667958443,
|
|
"learning_rate": 3.1290761721064886e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03507861867547035,
|
|
"step": 2090,
|
|
"valid_targets_mean": 1990.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 2.6552598225602027,
|
|
"grad_norm": 0.3476444951754252,
|
|
"learning_rate": 3.123853051357627e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04587898403406143,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3935.1,
|
|
"valid_targets_min": 2440
|
|
},
|
|
{
|
|
"epoch": 2.661596958174905,
|
|
"grad_norm": 0.4891671757505336,
|
|
"learning_rate": 3.118618704321676e-05,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04419543221592903,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2397.4,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 2.667934093789607,
|
|
"grad_norm": 0.5058459487200533,
|
|
"learning_rate": 3.113373183285085e-05,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05863305553793907,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3932.4,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 2.6742712294043094,
|
|
"grad_norm": 0.4305890176463039,
|
|
"learning_rate": 3.108116540645918e-05,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04517430067062378,
|
|
"step": 2110,
|
|
"valid_targets_mean": 1077.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 2.6806083650190113,
|
|
"grad_norm": 0.5270140564443253,
|
|
"learning_rate": 3.102848828913335e-05,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06514804065227509,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2774.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.6869455006337137,
|
|
"grad_norm": 0.3018320541452763,
|
|
"learning_rate": 3.097570100707067e-05,
|
|
"loss": 0.0925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03203708678483963,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3742.4,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 2.6932826362484157,
|
|
"grad_norm": 0.3513243600005716,
|
|
"learning_rate": 3.092280408756887e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051208965480327606,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4370.5,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 2.6996197718631176,
|
|
"grad_norm": 0.3659578146061258,
|
|
"learning_rate": 3.086979805902088e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0525459349155426,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3920.0,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 2.70595690747782,
|
|
"grad_norm": 0.5658169982791896,
|
|
"learning_rate": 3.081668345090954e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04443363845348358,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3167.8,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 2.7122940430925224,
|
|
"grad_norm": 0.42679875466546335,
|
|
"learning_rate": 3.076346079380228e-05,
|
|
"loss": 0.1093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049416206777095795,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3224.8,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 2.7186311787072244,
|
|
"grad_norm": 0.4561245853337184,
|
|
"learning_rate": 3.0710130619345874e-05,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052236564457416534,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2267.4,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 2.7249683143219263,
|
|
"grad_norm": 0.34994434130583374,
|
|
"learning_rate": 3.065669346026106e-05,
|
|
"loss": 0.0858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045880235731601715,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3900.1,
|
|
"valid_targets_min": 2719
|
|
},
|
|
{
|
|
"epoch": 2.7313054499366287,
|
|
"grad_norm": 0.48861666047837343,
|
|
"learning_rate": 3.060314985033729e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050077274441719055,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1694.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.7376425855513307,
|
|
"grad_norm": 0.36718888174323455,
|
|
"learning_rate": 3.0549500324427344e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04534398019313812,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2999.2,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 2.743979721166033,
|
|
"grad_norm": 0.3777169007385018,
|
|
"learning_rate": 3.0495745418442014e-05,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03934663534164429,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2813.4,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.750316856780735,
|
|
"grad_norm": 0.3500086277616889,
|
|
"learning_rate": 3.0441885669344754e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04174109175801277,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2759.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 2.7566539923954374,
|
|
"grad_norm": 0.4477870335162003,
|
|
"learning_rate": 3.0387921615146285e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051833175122737885,
|
|
"step": 2175,
|
|
"valid_targets_mean": 2685.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.7629911280101394,
|
|
"grad_norm": 0.44663780756277,
|
|
"learning_rate": 3.0333853794899268e-05,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04608001187443733,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2241.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.7693282636248417,
|
|
"grad_norm": 0.5248558028750987,
|
|
"learning_rate": 3.027968274869288e-05,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04014691710472107,
|
|
"step": 2185,
|
|
"valid_targets_mean": 2132.1,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.7756653992395437,
|
|
"grad_norm": 0.53160487886994,
|
|
"learning_rate": 3.0225409017647428e-05,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051516029983758926,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2120.2,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 2.7820025348542456,
|
|
"grad_norm": 0.4989231051445991,
|
|
"learning_rate": 3.0171033143908966e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07810604572296143,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2409.0,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.788339670468948,
|
|
"grad_norm": 0.48121571213815933,
|
|
"learning_rate": 3.011655567064385e-05,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06338296085596085,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2898.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 2.7946768060836504,
|
|
"grad_norm": 0.29957262395739015,
|
|
"learning_rate": 3.006197714203333e-05,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03221147879958153,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3732.6,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 2.8010139416983524,
|
|
"grad_norm": 0.3505185451757404,
|
|
"learning_rate": 3.0007298103268096e-05,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03702673316001892,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3567.0,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 2.8073510773130543,
|
|
"grad_norm": 0.4250008230492533,
|
|
"learning_rate": 2.9952519100542862e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04045306518673897,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2104.5,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 2.8136882129277567,
|
|
"grad_norm": 0.3947689762959185,
|
|
"learning_rate": 2.9897640681050877e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04177038371562958,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2518.4,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.8200253485424587,
|
|
"grad_norm": 0.37518644673810053,
|
|
"learning_rate": 2.9842663392978483e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07632844150066376,
|
|
"step": 2225,
|
|
"valid_targets_mean": 5443.5,
|
|
"valid_targets_min": 3476
|
|
},
|
|
{
|
|
"epoch": 2.826362484157161,
|
|
"grad_norm": 0.3642045186912722,
|
|
"learning_rate": 2.9787587785499633e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046191081404685974,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3400.0,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 2.832699619771863,
|
|
"grad_norm": 0.30313074912123195,
|
|
"learning_rate": 2.9732414408770408e-05,
|
|
"loss": 0.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03773191198706627,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 2.8390367553865654,
|
|
"grad_norm": 0.4663549293623288,
|
|
"learning_rate": 2.9677143813923495e-05,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07438945770263672,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4289.2,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 2.8453738910012674,
|
|
"grad_norm": 0.3431395232788247,
|
|
"learning_rate": 2.9621776553062722e-05,
|
|
"loss": 0.0994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05152632296085358,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4064.9,
|
|
"valid_targets_min": 3866
|
|
},
|
|
{
|
|
"epoch": 2.8517110266159698,
|
|
"grad_norm": 0.38136723300365033,
|
|
"learning_rate": 2.9566313179257516e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04993756115436554,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3151.6,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.8580481622306717,
|
|
"grad_norm": 0.5048560495179798,
|
|
"learning_rate": 2.9510754246537385e-05,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10741822421550751,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2322.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 2.8643852978453737,
|
|
"grad_norm": 0.42729610330351475,
|
|
"learning_rate": 2.94551003098864e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04850064218044281,
|
|
"step": 2260,
|
|
"valid_targets_mean": 1779.4,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 2.870722433460076,
|
|
"grad_norm": 0.3760727502598093,
|
|
"learning_rate": 2.9399351925237617e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05377878621220589,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3963.5,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 2.8770595690747784,
|
|
"grad_norm": 0.4047193867261175,
|
|
"learning_rate": 2.9343509649467553e-05,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04616712033748627,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2771.1,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 2.8833967046894804,
|
|
"grad_norm": 0.35896470412759646,
|
|
"learning_rate": 2.9287574040390613e-05,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03425322845578194,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3297.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.8897338403041823,
|
|
"grad_norm": 0.5361889254880974,
|
|
"learning_rate": 2.923154565675351e-05,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07269703596830368,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2639.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.8960709759188847,
|
|
"grad_norm": 0.4722072660072263,
|
|
"learning_rate": 2.9175425058229704e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08170512318611145,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2430.0,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 2.9024081115335867,
|
|
"grad_norm": 0.5029544462993528,
|
|
"learning_rate": 2.91192128054138e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04394414275884628,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2397.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.908745247148289,
|
|
"grad_norm": 0.4756541983966229,
|
|
"learning_rate": 2.9062909459815932e-05,
|
|
"loss": 0.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04646773636341095,
|
|
"step": 2295,
|
|
"valid_targets_mean": 1563.5,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 2.915082382762991,
|
|
"grad_norm": 0.3996844885193375,
|
|
"learning_rate": 2.9006515583856177e-05,
|
|
"loss": 0.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05151201784610748,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3076.2,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 2.9214195183776934,
|
|
"grad_norm": 0.4370980760110792,
|
|
"learning_rate": 2.895003174085894e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05359993875026703,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2928.8,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.9277566539923954,
|
|
"grad_norm": 0.4515664958220793,
|
|
"learning_rate": 2.88934584950473e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030054526403546333,
|
|
"step": 2310,
|
|
"valid_targets_mean": 815.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.9340937896070978,
|
|
"grad_norm": 0.38041169631360905,
|
|
"learning_rate": 2.88367964115374e-05,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04833105951547623,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3706.0,
|
|
"valid_targets_min": 2791
|
|
},
|
|
{
|
|
"epoch": 2.9404309252217997,
|
|
"grad_norm": 0.4453815080350293,
|
|
"learning_rate": 2.8780046056332795e-05,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046589195728302,
|
|
"step": 2320,
|
|
"valid_targets_mean": 1599.5,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 2.9467680608365017,
|
|
"grad_norm": 0.36921411127732445,
|
|
"learning_rate": 2.8723207996318776e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036397866904735565,
|
|
"step": 2325,
|
|
"valid_targets_mean": 1875.4,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 2.953105196451204,
|
|
"grad_norm": 0.44717365700971456,
|
|
"learning_rate": 2.8666282799256763e-05,
|
|
"loss": 0.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05828263610601425,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3617.6,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 2.9594423320659065,
|
|
"grad_norm": 0.29248078533139854,
|
|
"learning_rate": 2.860927103377855e-05,
|
|
"loss": 0.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040453314781188965,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4729.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 2.9657794676806084,
|
|
"grad_norm": 0.33947525242730164,
|
|
"learning_rate": 2.8552173269380716e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03988099470734596,
|
|
"step": 2340,
|
|
"valid_targets_mean": 1725.4,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.9721166032953104,
|
|
"grad_norm": 1.373785414285072,
|
|
"learning_rate": 2.8494990076418862e-05,
|
|
"loss": 0.0939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04010583460330963,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3447.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.9784537389100127,
|
|
"grad_norm": 0.48489105927518406,
|
|
"learning_rate": 2.8437722026101965e-05,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437511146068573,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3122.9,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 2.9847908745247147,
|
|
"grad_norm": 0.3573400001270575,
|
|
"learning_rate": 2.838036969048663e-05,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044119637459516525,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3351.6,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 2.991128010139417,
|
|
"grad_norm": 0.39958960331993454,
|
|
"learning_rate": 2.832293364247141e-05,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04126648232340813,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2663.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.997465145754119,
|
|
"grad_norm": 0.47358351369829915,
|
|
"learning_rate": 2.826541445579108e-05,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09820396453142166,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2112.8,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 3.0038022813688214,
|
|
"grad_norm": 0.501387302788557,
|
|
"learning_rate": 2.820781270501087e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10288380831480026,
|
|
"step": 2370,
|
|
"valid_targets_mean": 5435.1,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 3.0101394169835234,
|
|
"grad_norm": 0.38012147267541624,
|
|
"learning_rate": 2.8150128965520774e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1007281243801117,
|
|
"step": 2375,
|
|
"valid_targets_mean": 7654.2,
|
|
"valid_targets_min": 6354
|
|
},
|
|
{
|
|
"epoch": 3.016476552598226,
|
|
"grad_norm": 0.3900209472532429,
|
|
"learning_rate": 2.8092363813529773e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11584925651550293,
|
|
"step": 2380,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 5754
|
|
},
|
|
{
|
|
"epoch": 3.0228136882129277,
|
|
"grad_norm": 0.366049663583431,
|
|
"learning_rate": 2.8034517826060073e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08700406551361084,
|
|
"step": 2385,
|
|
"valid_targets_mean": 7666.2,
|
|
"valid_targets_min": 5666
|
|
},
|
|
{
|
|
"epoch": 3.02915082382763,
|
|
"grad_norm": 0.4099941496139423,
|
|
"learning_rate": 2.7976591580941373e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10207788646221161,
|
|
"step": 2390,
|
|
"valid_targets_mean": 7469.0,
|
|
"valid_targets_min": 5646
|
|
},
|
|
{
|
|
"epoch": 3.035487959442332,
|
|
"grad_norm": 0.41291823964787466,
|
|
"learning_rate": 2.7918585656805068e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11592403799295425,
|
|
"step": 2395,
|
|
"valid_targets_mean": 7046.9,
|
|
"valid_targets_min": 5571
|
|
},
|
|
{
|
|
"epoch": 3.041825095057034,
|
|
"grad_norm": 0.3919051854790693,
|
|
"learning_rate": 2.7860500633078475e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1123548299074173,
|
|
"step": 2400,
|
|
"valid_targets_mean": 7827.6,
|
|
"valid_targets_min": 5405
|
|
},
|
|
{
|
|
"epoch": 3.0481622306717364,
|
|
"grad_norm": 0.40854237847685365,
|
|
"learning_rate": 2.780233708997904e-05,
|
|
"loss": 0.1993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10491998493671417,
|
|
"step": 2405,
|
|
"valid_targets_mean": 7023.5,
|
|
"valid_targets_min": 5664
|
|
},
|
|
{
|
|
"epoch": 3.0544993662864384,
|
|
"grad_norm": 0.3687463686163387,
|
|
"learning_rate": 2.774409560850855e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09355193376541138,
|
|
"step": 2410,
|
|
"valid_targets_mean": 7069.8,
|
|
"valid_targets_min": 5103
|
|
},
|
|
{
|
|
"epoch": 3.0608365019011408,
|
|
"grad_norm": 0.3918444952137399,
|
|
"learning_rate": 2.7685776770447345e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09233696758747101,
|
|
"step": 2415,
|
|
"valid_targets_mean": 6712.4,
|
|
"valid_targets_min": 5469
|
|
},
|
|
{
|
|
"epoch": 3.0671736375158427,
|
|
"grad_norm": 0.37220440051806053,
|
|
"learning_rate": 2.7627381158348446e-05,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09257631003856659,
|
|
"step": 2420,
|
|
"valid_targets_mean": 7289.2,
|
|
"valid_targets_min": 5513
|
|
},
|
|
{
|
|
"epoch": 3.073510773130545,
|
|
"grad_norm": 0.3847810442201023,
|
|
"learning_rate": 2.7568909355531818e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09126466512680054,
|
|
"step": 2425,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 5691
|
|
},
|
|
{
|
|
"epoch": 3.079847908745247,
|
|
"grad_norm": 0.3623390929016227,
|
|
"learning_rate": 2.7510361946078482e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09999135136604309,
|
|
"step": 2430,
|
|
"valid_targets_mean": 6736.0,
|
|
"valid_targets_min": 5550
|
|
},
|
|
{
|
|
"epoch": 3.0861850443599494,
|
|
"grad_norm": 0.39908365946729335,
|
|
"learning_rate": 2.745173951482472e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08824657648801804,
|
|
"step": 2435,
|
|
"valid_targets_mean": 6346.9,
|
|
"valid_targets_min": 5067
|
|
},
|
|
{
|
|
"epoch": 3.0925221799746514,
|
|
"grad_norm": 0.3797604467603204,
|
|
"learning_rate": 2.7393042647356187e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0951378345489502,
|
|
"step": 2440,
|
|
"valid_targets_mean": 6784.0,
|
|
"valid_targets_min": 5618
|
|
},
|
|
{
|
|
"epoch": 3.098859315589354,
|
|
"grad_norm": 0.4536740102425303,
|
|
"learning_rate": 2.7334271930002116e-05,
|
|
"loss": 0.1926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10825905203819275,
|
|
"step": 2445,
|
|
"valid_targets_mean": 7568.4,
|
|
"valid_targets_min": 5856
|
|
},
|
|
{
|
|
"epoch": 3.1051964512040557,
|
|
"grad_norm": 0.44384024662487787,
|
|
"learning_rate": 2.7275427949829422e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08908016979694366,
|
|
"step": 2450,
|
|
"valid_targets_mean": 7198.5,
|
|
"valid_targets_min": 4793
|
|
},
|
|
{
|
|
"epoch": 3.111533586818758,
|
|
"grad_norm": 0.3915573467923644,
|
|
"learning_rate": 2.721651129463685e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09706535935401917,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6680.5,
|
|
"valid_targets_min": 5350
|
|
},
|
|
{
|
|
"epoch": 3.11787072243346,
|
|
"grad_norm": 0.4355868540961365,
|
|
"learning_rate": 2.71575225529491e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10071912407875061,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6770.6,
|
|
"valid_targets_min": 5753
|
|
},
|
|
{
|
|
"epoch": 3.124207858048162,
|
|
"grad_norm": 0.7684744932054731,
|
|
"learning_rate": 2.7098462314010964e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07251593470573425,
|
|
"step": 2465,
|
|
"valid_targets_mean": 1419.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.1305449936628644,
|
|
"grad_norm": 0.43645794126258486,
|
|
"learning_rate": 2.7039331167781416e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09949930757284164,
|
|
"step": 2470,
|
|
"valid_targets_mean": 6784.2,
|
|
"valid_targets_min": 5042
|
|
},
|
|
{
|
|
"epoch": 3.1368821292775664,
|
|
"grad_norm": 0.3837234415021236,
|
|
"learning_rate": 2.6980129704927733e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09118452668190002,
|
|
"step": 2475,
|
|
"valid_targets_mean": 6774.9,
|
|
"valid_targets_min": 3875
|
|
},
|
|
{
|
|
"epoch": 3.1432192648922688,
|
|
"grad_norm": 0.44954465339389144,
|
|
"learning_rate": 2.6920858516819603e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09777403622865677,
|
|
"step": 2480,
|
|
"valid_targets_mean": 7047.6,
|
|
"valid_targets_min": 6134
|
|
},
|
|
{
|
|
"epoch": 3.1495564005069707,
|
|
"grad_norm": 0.39881941688912054,
|
|
"learning_rate": 2.686151819552319e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10269537568092346,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6886.6,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 3.155893536121673,
|
|
"grad_norm": 0.3684401960943093,
|
|
"learning_rate": 2.6802109333795237e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08688201010227203,
|
|
"step": 2490,
|
|
"valid_targets_mean": 7559.8,
|
|
"valid_targets_min": 6145
|
|
},
|
|
{
|
|
"epoch": 3.162230671736375,
|
|
"grad_norm": 0.33541598428998126,
|
|
"learning_rate": 2.674263252507717e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08504645526409149,
|
|
"step": 2495,
|
|
"valid_targets_mean": 8920.5,
|
|
"valid_targets_min": 4780
|
|
},
|
|
{
|
|
"epoch": 3.1685678073510775,
|
|
"grad_norm": 0.3351012331359497,
|
|
"learning_rate": 2.6683088363489118e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07533422112464905,
|
|
"step": 2500,
|
|
"valid_targets_mean": 7393.4,
|
|
"valid_targets_min": 5116
|
|
},
|
|
{
|
|
"epoch": 3.1749049429657794,
|
|
"grad_norm": 0.33217032996100804,
|
|
"learning_rate": 2.6623477443824008e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08264975249767303,
|
|
"step": 2505,
|
|
"valid_targets_mean": 8477.5,
|
|
"valid_targets_min": 4033
|
|
},
|
|
{
|
|
"epoch": 3.181242078580482,
|
|
"grad_norm": 0.3858203688398608,
|
|
"learning_rate": 2.6563800361541636e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09333235025405884,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6736.6,
|
|
"valid_targets_min": 4646
|
|
},
|
|
{
|
|
"epoch": 3.1875792141951838,
|
|
"grad_norm": 0.41589878264647406,
|
|
"learning_rate": 2.6504057712762685e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09664206206798553,
|
|
"step": 2515,
|
|
"valid_targets_mean": 7551.5,
|
|
"valid_targets_min": 5461
|
|
},
|
|
{
|
|
"epoch": 3.1939163498098857,
|
|
"grad_norm": 0.4390269502580065,
|
|
"learning_rate": 2.6444250094262804e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11244700849056244,
|
|
"step": 2520,
|
|
"valid_targets_mean": 7091.0,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 3.200253485424588,
|
|
"grad_norm": 0.4155504125811794,
|
|
"learning_rate": 2.6384378103466617e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09250454604625702,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6269.8,
|
|
"valid_targets_min": 4606
|
|
},
|
|
{
|
|
"epoch": 3.20659062103929,
|
|
"grad_norm": 0.3898596469460076,
|
|
"learning_rate": 2.632444233844179e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08880184590816498,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6818.4,
|
|
"valid_targets_min": 5525
|
|
},
|
|
{
|
|
"epoch": 3.2129277566539924,
|
|
"grad_norm": 0.39309129602937204,
|
|
"learning_rate": 2.6264443397893015e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0918392688035965,
|
|
"step": 2535,
|
|
"valid_targets_mean": 7365.4,
|
|
"valid_targets_min": 5000
|
|
},
|
|
{
|
|
"epoch": 3.2192648922686944,
|
|
"grad_norm": 0.4173595143732043,
|
|
"learning_rate": 2.620438188115606e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08497893810272217,
|
|
"step": 2540,
|
|
"valid_targets_mean": 6212.1,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 3.225602027883397,
|
|
"grad_norm": 0.3944727762231605,
|
|
"learning_rate": 2.6144258388191776e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09405836462974548,
|
|
"step": 2545,
|
|
"valid_targets_mean": 8396.9,
|
|
"valid_targets_min": 6131
|
|
},
|
|
{
|
|
"epoch": 3.2319391634980987,
|
|
"grad_norm": 0.40175148994565074,
|
|
"learning_rate": 2.6084073519580092e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09787602722644806,
|
|
"step": 2550,
|
|
"valid_targets_mean": 7651.0,
|
|
"valid_targets_min": 4061
|
|
},
|
|
{
|
|
"epoch": 3.238276299112801,
|
|
"grad_norm": 0.388024865837785,
|
|
"learning_rate": 2.6023827876514038e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09385988861322403,
|
|
"step": 2555,
|
|
"valid_targets_mean": 7090.1,
|
|
"valid_targets_min": 5203
|
|
},
|
|
{
|
|
"epoch": 3.244613434727503,
|
|
"grad_norm": 0.4100488276422231,
|
|
"learning_rate": 2.5963522060793716e-05,
|
|
"loss": 0.1926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09056046605110168,
|
|
"step": 2560,
|
|
"valid_targets_mean": 7001.0,
|
|
"valid_targets_min": 5362
|
|
},
|
|
{
|
|
"epoch": 3.2509505703422055,
|
|
"grad_norm": 0.4001501024270812,
|
|
"learning_rate": 2.5903156674820306e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0931127518415451,
|
|
"step": 2565,
|
|
"valid_targets_mean": 6359.2,
|
|
"valid_targets_min": 5236
|
|
},
|
|
{
|
|
"epoch": 3.2572877059569074,
|
|
"grad_norm": 0.45554108816730765,
|
|
"learning_rate": 2.5842732321590034e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1019999086856842,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5939.8,
|
|
"valid_targets_min": 4693
|
|
},
|
|
{
|
|
"epoch": 3.26362484157161,
|
|
"grad_norm": 1.1039438408564701,
|
|
"learning_rate": 2.578224960468816e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03476908057928085,
|
|
"step": 2575,
|
|
"valid_targets_mean": 208.6,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 3.2699619771863118,
|
|
"grad_norm": 0.44008945456012766,
|
|
"learning_rate": 2.5721709128282953e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0969371497631073,
|
|
"step": 2580,
|
|
"valid_targets_mean": 6008.5,
|
|
"valid_targets_min": 4956
|
|
},
|
|
{
|
|
"epoch": 3.2762991128010137,
|
|
"grad_norm": 0.4207666701434133,
|
|
"learning_rate": 2.566111149711963e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08141922205686569,
|
|
"step": 2585,
|
|
"valid_targets_mean": 6042.0,
|
|
"valid_targets_min": 4817
|
|
},
|
|
{
|
|
"epoch": 3.282636248415716,
|
|
"grad_norm": 0.4184514573032483,
|
|
"learning_rate": 2.560045731651434e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09297875314950943,
|
|
"step": 2590,
|
|
"valid_targets_mean": 6608.6,
|
|
"valid_targets_min": 5102
|
|
},
|
|
{
|
|
"epoch": 3.288973384030418,
|
|
"grad_norm": 0.4213260261776277,
|
|
"learning_rate": 2.5539747192348125e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09297514706850052,
|
|
"step": 2595,
|
|
"valid_targets_mean": 6670.1,
|
|
"valid_targets_min": 4869
|
|
},
|
|
{
|
|
"epoch": 3.2953105196451205,
|
|
"grad_norm": 0.4470247664402962,
|
|
"learning_rate": 2.5478981731060838e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08220653235912323,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6088.4,
|
|
"valid_targets_min": 4946
|
|
},
|
|
{
|
|
"epoch": 3.3016476552598224,
|
|
"grad_norm": 0.4149795860894828,
|
|
"learning_rate": 2.5418161539645097e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08881151676177979,
|
|
"step": 2605,
|
|
"valid_targets_mean": 6228.4,
|
|
"valid_targets_min": 4856
|
|
},
|
|
{
|
|
"epoch": 3.307984790874525,
|
|
"grad_norm": 0.4214395663821203,
|
|
"learning_rate": 2.5357287225640236e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0900336354970932,
|
|
"step": 2610,
|
|
"valid_targets_mean": 5376.6,
|
|
"valid_targets_min": 4145
|
|
},
|
|
{
|
|
"epoch": 3.3143219264892267,
|
|
"grad_norm": 0.5058641407291419,
|
|
"learning_rate": 2.529635939712623e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09441885352134705,
|
|
"step": 2615,
|
|
"valid_targets_mean": 6309.2,
|
|
"valid_targets_min": 5305
|
|
},
|
|
{
|
|
"epoch": 3.320659062103929,
|
|
"grad_norm": 0.4741011089056058,
|
|
"learning_rate": 2.523537866271759e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08905699849128723,
|
|
"step": 2620,
|
|
"valid_targets_mean": 6221.5,
|
|
"valid_targets_min": 4636
|
|
},
|
|
{
|
|
"epoch": 3.326996197718631,
|
|
"grad_norm": 0.4155892645289333,
|
|
"learning_rate": 2.517434563155734e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0972861647605896,
|
|
"step": 2625,
|
|
"valid_targets_mean": 6831.5,
|
|
"valid_targets_min": 5668
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.5339139598487168,
|
|
"learning_rate": 2.5113260913310883e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09874635189771652,
|
|
"step": 2630,
|
|
"valid_targets_mean": 6437.1,
|
|
"valid_targets_min": 5004
|
|
},
|
|
{
|
|
"epoch": 3.3396704689480354,
|
|
"grad_norm": 0.39333327264861645,
|
|
"learning_rate": 2.5052125118159936e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08605389297008514,
|
|
"step": 2635,
|
|
"valid_targets_mean": 6205.8,
|
|
"valid_targets_min": 5341
|
|
},
|
|
{
|
|
"epoch": 3.346007604562738,
|
|
"grad_norm": 0.4168892993812132,
|
|
"learning_rate": 2.499093885679642e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09775331616401672,
|
|
"step": 2640,
|
|
"valid_targets_mean": 6748.0,
|
|
"valid_targets_min": 5363
|
|
},
|
|
{
|
|
"epoch": 3.3523447401774398,
|
|
"grad_norm": 0.4487511375454597,
|
|
"learning_rate": 2.492970274041639e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08395624905824661,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5020.1,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 3.3586818757921417,
|
|
"grad_norm": 0.42965134935285154,
|
|
"learning_rate": 2.486841738071389e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052882347255945206,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2902.5,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 3.365019011406844,
|
|
"grad_norm": 0.44357207418416156,
|
|
"learning_rate": 2.4807083389874858e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05051802098751068,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3214.1,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 3.371356147021546,
|
|
"grad_norm": 0.8932317062049498,
|
|
"learning_rate": 2.4745701380571028e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055399179458618164,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3694.6,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 3.3776932826362485,
|
|
"grad_norm": 0.40365353989801567,
|
|
"learning_rate": 2.468427196595379e-05,
|
|
"loss": 0.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046118564903736115,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3440.6,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 3.3840304182509504,
|
|
"grad_norm": 0.4461033101993593,
|
|
"learning_rate": 2.462279575964806e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04583527892827988,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2955.5,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 3.390367553865653,
|
|
"grad_norm": 0.7863846356750369,
|
|
"learning_rate": 2.4561273375746178e-05,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0689534842967987,
|
|
"step": 2675,
|
|
"valid_targets_mean": 1115.9,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.3967046894803548,
|
|
"grad_norm": 0.4190116531930308,
|
|
"learning_rate": 2.4499705428801738e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046672455966472626,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3223.4,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.403041825095057,
|
|
"grad_norm": 0.4529298074923214,
|
|
"learning_rate": 2.4438092533823495e-05,
|
|
"loss": 0.0979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07600854337215424,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3784.6,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 3.409378960709759,
|
|
"grad_norm": 0.31024894309693946,
|
|
"learning_rate": 2.437643530626916e-05,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04591240733861923,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3476.4,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 3.4157160963244615,
|
|
"grad_norm": 0.2907597143013337,
|
|
"learning_rate": 2.4314734362039323e-05,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04238685965538025,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5059.1,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 3.4220532319391634,
|
|
"grad_norm": 0.36104196695891777,
|
|
"learning_rate": 2.425299031747123e-05,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03932809457182884,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3265.2,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.428390367553866,
|
|
"grad_norm": 0.35602257313715546,
|
|
"learning_rate": 2.4191203789332695e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047472622245550156,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3795.9,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 3.434727503168568,
|
|
"grad_norm": 0.546094870873191,
|
|
"learning_rate": 2.4129375394815878e-05,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09077220410108566,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2260.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.4410646387832697,
|
|
"grad_norm": 0.3967940462734825,
|
|
"learning_rate": 2.4067505751531158e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03870636597275734,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3740.5,
|
|
"valid_targets_min": 3040
|
|
},
|
|
{
|
|
"epoch": 3.447401774397972,
|
|
"grad_norm": 0.5178506079667385,
|
|
"learning_rate": 2.4005595477500942e-05,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053537048399448395,
|
|
"step": 2720,
|
|
"valid_targets_mean": 1943.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.453738910012674,
|
|
"grad_norm": 0.32299428729272,
|
|
"learning_rate": 2.3943645191153522e-05,
|
|
"loss": 0.0874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031246639788150787,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2742.8,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.4600760456273765,
|
|
"grad_norm": 0.3524679110567197,
|
|
"learning_rate": 2.3881655511316865e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03923824429512024,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3805.0,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 3.4664131812420784,
|
|
"grad_norm": 0.4363859296437833,
|
|
"learning_rate": 2.381962705721244e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03347650170326233,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2277.2,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 3.472750316856781,
|
|
"grad_norm": 0.41586742762707074,
|
|
"learning_rate": 2.3757560448449036e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04267163947224617,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2702.8,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 3.4790874524714828,
|
|
"grad_norm": 0.4380783209824341,
|
|
"learning_rate": 2.3695456305016588e-05,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04519493132829666,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2920.8,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 3.485424588086185,
|
|
"grad_norm": 0.39754738009989404,
|
|
"learning_rate": 2.363331524727996e-05,
|
|
"loss": 0.1025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050032101571559906,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3389.1,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 3.491761723700887,
|
|
"grad_norm": 0.36335664462043626,
|
|
"learning_rate": 2.3571137895972735e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03647737205028534,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3256.8,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 3.4980988593155895,
|
|
"grad_norm": 0.4295141631458529,
|
|
"learning_rate": 2.350892487219108e-05,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043407879769802094,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3214.1,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 3.5044359949302915,
|
|
"grad_norm": 0.8244744096715992,
|
|
"learning_rate": 2.3446676797387463e-05,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0726076140999794,
|
|
"step": 2765,
|
|
"valid_targets_mean": 953.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 3.510773130544994,
|
|
"grad_norm": 0.5091105455242201,
|
|
"learning_rate": 2.3384394293364484e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06729063391685486,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2750.6,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 3.517110266159696,
|
|
"grad_norm": 0.4378170560354809,
|
|
"learning_rate": 2.332207798226869e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0296617541462183,
|
|
"step": 2775,
|
|
"valid_targets_mean": 2265.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 3.5234474017743977,
|
|
"grad_norm": 0.6269230900484041,
|
|
"learning_rate": 2.3259728486584297e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16477887332439423,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2829.9,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.5297845373891,
|
|
"grad_norm": 0.4746508693380042,
|
|
"learning_rate": 2.319734642912701e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04429415240883827,
|
|
"step": 2785,
|
|
"valid_targets_mean": 1931.6,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.5361216730038025,
|
|
"grad_norm": 0.35322632984918984,
|
|
"learning_rate": 2.313493243303781e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040937140583992004,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3481.8,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 3.5424588086185045,
|
|
"grad_norm": 0.7289575818388888,
|
|
"learning_rate": 2.307248712177672e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21927553415298462,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2351.1,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 3.5487959442332064,
|
|
"grad_norm": 0.3413395321816812,
|
|
"learning_rate": 2.301001111911655e-05,
|
|
"loss": 0.1093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043088022619485855,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4478.1,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 3.555133079847909,
|
|
"grad_norm": 0.3379959024601892,
|
|
"learning_rate": 2.2947505049136707e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03823158144950867,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4229.2,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 3.5614702154626108,
|
|
"grad_norm": 0.40243057040020447,
|
|
"learning_rate": 2.2884969536216942e-05,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04565218463540077,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3347.1,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.567807351077313,
|
|
"grad_norm": 0.4383015020021015,
|
|
"learning_rate": 2.2822405205031112e-05,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03200094401836395,
|
|
"step": 2815,
|
|
"valid_targets_mean": 1886.6,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 3.574144486692015,
|
|
"grad_norm": 0.49858203427758035,
|
|
"learning_rate": 2.2759812680540942e-05,
|
|
"loss": 0.0916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043076999485492706,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2613.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.5804816223067175,
|
|
"grad_norm": 0.47998654559400955,
|
|
"learning_rate": 2.2697192587989786e-05,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03995348513126373,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2048.2,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 3.5868187579214195,
|
|
"grad_norm": 0.39870713019468623,
|
|
"learning_rate": 2.2634545552896384e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0435982383787632,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3611.4,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 3.593155893536122,
|
|
"grad_norm": 0.3512760492577894,
|
|
"learning_rate": 2.2571872201048586e-05,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036693722009658813,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.599493029150824,
|
|
"grad_norm": 0.36008804524946064,
|
|
"learning_rate": 2.2509173158497148e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024128355085849762,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3335.6,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 3.6058301647655258,
|
|
"grad_norm": 0.402514084312606,
|
|
"learning_rate": 2.2446449051549442e-05,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035821788012981415,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2846.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.612167300380228,
|
|
"grad_norm": 0.3368718617318631,
|
|
"learning_rate": 2.2383700506763204e-05,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03935318812727928,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 3.6185044359949305,
|
|
"grad_norm": 0.3681526459628367,
|
|
"learning_rate": 2.2320928150940294e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041912369430065155,
|
|
"step": 2855,
|
|
"valid_targets_mean": 2912.9,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 3.6248415716096325,
|
|
"grad_norm": 0.6187649321286365,
|
|
"learning_rate": 2.225813261112042e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1051560714840889,
|
|
"step": 2860,
|
|
"valid_targets_mean": 1534.0,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.6311787072243344,
|
|
"grad_norm": 0.38913463311509877,
|
|
"learning_rate": 2.219531451457488e-05,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04748475179076195,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3695.5,
|
|
"valid_targets_min": 2800
|
|
},
|
|
{
|
|
"epoch": 3.637515842839037,
|
|
"grad_norm": 0.5096319902410447,
|
|
"learning_rate": 2.2132474488800275e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05704424902796745,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2400.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.643852978453739,
|
|
"grad_norm": 0.46148832321717065,
|
|
"learning_rate": 2.20696131615123e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049703147262334824,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3223.4,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 3.650190114068441,
|
|
"grad_norm": 0.46290335022250206,
|
|
"learning_rate": 2.2006731160639377e-05,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04652751609683037,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2616.0,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.656527249683143,
|
|
"grad_norm": 0.41360384478293954,
|
|
"learning_rate": 2.1943829114316486e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04023801162838936,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2001.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.6628643852978455,
|
|
"grad_norm": 0.39180148572518964,
|
|
"learning_rate": 2.1880907650878824e-05,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07201077044010162,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3172.4,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 3.6692015209125475,
|
|
"grad_norm": 0.5005808093309119,
|
|
"learning_rate": 2.1817967398855546e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042856357991695404,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 3.67553865652725,
|
|
"grad_norm": 0.39895357175207496,
|
|
"learning_rate": 2.1755008986963494e-05,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036911848932504654,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2978.6,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.681875792141952,
|
|
"grad_norm": 0.36454439793516735,
|
|
"learning_rate": 2.1692033044100908e-05,
|
|
"loss": 0.0922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044618088752031326,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5015.2,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 3.6882129277566538,
|
|
"grad_norm": 0.40468977182069304,
|
|
"learning_rate": 2.1629040199341142e-05,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05393015220761299,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4192.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.694550063371356,
|
|
"grad_norm": 0.2944013331649188,
|
|
"learning_rate": 2.15660310819264e-05,
|
|
"loss": 0.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02888629585504532,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4604.4,
|
|
"valid_targets_min": 3804
|
|
},
|
|
{
|
|
"epoch": 3.7008871989860586,
|
|
"grad_norm": 0.3521396997923972,
|
|
"learning_rate": 2.150300632126142e-05,
|
|
"loss": 0.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04115761071443558,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3744.0,
|
|
"valid_targets_min": 2862
|
|
},
|
|
{
|
|
"epoch": 3.7072243346007605,
|
|
"grad_norm": 0.6154376291736575,
|
|
"learning_rate": 2.1439966546907204e-05,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0584292970597744,
|
|
"step": 2925,
|
|
"valid_targets_mean": 1311.2,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 3.7135614702154625,
|
|
"grad_norm": 0.4125956792975641,
|
|
"learning_rate": 2.1376912388574736e-05,
|
|
"loss": 0.0915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05442231893539429,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4210.6,
|
|
"valid_targets_min": 4017
|
|
},
|
|
{
|
|
"epoch": 3.719898605830165,
|
|
"grad_norm": 0.5249550532347247,
|
|
"learning_rate": 2.131384447611867e-05,
|
|
"loss": 0.1025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04606545716524124,
|
|
"step": 2935,
|
|
"valid_targets_mean": 1778.8,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 3.726235741444867,
|
|
"grad_norm": 0.36539847735603603,
|
|
"learning_rate": 2.125076343953107e-05,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03808154538273811,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3319.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 3.732572877059569,
|
|
"grad_norm": 0.44767584791808873,
|
|
"learning_rate": 2.1187669908935083e-05,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07170660048723221,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2890.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 3.738910012674271,
|
|
"grad_norm": 0.4053845192184301,
|
|
"learning_rate": 2.1124564514578664e-05,
|
|
"loss": 0.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040470004081726074,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3689.5,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 3.7452471482889735,
|
|
"grad_norm": 0.5681043164674722,
|
|
"learning_rate": 2.106144788682827e-05,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06720849126577377,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1803.5,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.7515842839036755,
|
|
"grad_norm": 0.47190735614335044,
|
|
"learning_rate": 2.099832065616259e-05,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04032148793339729,
|
|
"step": 2960,
|
|
"valid_targets_mean": 1761.5,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 3.757921419518378,
|
|
"grad_norm": 0.4815022592159606,
|
|
"learning_rate": 2.0935183453166204e-05,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0444331094622612,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1609.0,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 3.76425855513308,
|
|
"grad_norm": 0.40685029678665297,
|
|
"learning_rate": 2.087203690852331e-05,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03976685181260109,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2906.5,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 3.770595690747782,
|
|
"grad_norm": 0.4828961074355787,
|
|
"learning_rate": 2.080888165301144e-05,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03586552292108536,
|
|
"step": 2975,
|
|
"valid_targets_mean": 1267.4,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.776932826362484,
|
|
"grad_norm": 0.46926902052038416,
|
|
"learning_rate": 2.0745718317495118e-05,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031066587194800377,
|
|
"step": 2980,
|
|
"valid_targets_mean": 1789.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.7832699619771866,
|
|
"grad_norm": 0.6974480062996813,
|
|
"learning_rate": 2.0682547532919587e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11521219462156296,
|
|
"step": 2985,
|
|
"valid_targets_mean": 1905.9,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 3.7896070975918885,
|
|
"grad_norm": 0.4247087843354057,
|
|
"learning_rate": 2.061936993030451e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05654385685920715,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3468.1,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 3.7959442332065905,
|
|
"grad_norm": 0.3278592171396103,
|
|
"learning_rate": 2.0556186140737634e-05,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03483317792415619,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3472.8,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.802281368821293,
|
|
"grad_norm": 0.3840933609522823,
|
|
"learning_rate": 2.0492996795368533e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04799468070268631,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2790.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 3.808618504435995,
|
|
"grad_norm": 0.4480661727096712,
|
|
"learning_rate": 2.042980252540226e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054562047123909,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3518.2,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 3.814955640050697,
|
|
"grad_norm": 0.3580957014506992,
|
|
"learning_rate": 2.0366603962093063e-05,
|
|
"loss": 0.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0360712856054306,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3566.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.821292775665399,
|
|
"grad_norm": 0.33925050347157687,
|
|
"learning_rate": 2.0303401736738092e-05,
|
|
"loss": 0.0896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04121437668800354,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4745.2,
|
|
"valid_targets_min": 3282
|
|
},
|
|
{
|
|
"epoch": 3.8276299112801015,
|
|
"grad_norm": 0.32684675420699344,
|
|
"learning_rate": 2.0240196480671047e-05,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034610141068696976,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3628.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 3.8339670468948035,
|
|
"grad_norm": 0.277312185535824,
|
|
"learning_rate": 2.0176988825255937e-05,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03920508176088333,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4350.0,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 3.840304182509506,
|
|
"grad_norm": 0.4458778501842802,
|
|
"learning_rate": 2.011377940188072e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05094040930271149,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4333.6,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 3.846641318124208,
|
|
"grad_norm": 0.36187305145100945,
|
|
"learning_rate": 2.0050568841951014e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032240405678749084,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2541.4,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.85297845373891,
|
|
"grad_norm": 0.4042123312039504,
|
|
"learning_rate": 1.9987357776883793e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0497078113257885,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2578.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 3.859315589353612,
|
|
"grad_norm": 0.3212238010848917,
|
|
"learning_rate": 1.9924146838101073e-05,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032965101301670074,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3501.0,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 3.8656527249683146,
|
|
"grad_norm": 0.4047891403488538,
|
|
"learning_rate": 1.9860936657023623e-05,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06896867603063583,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2050.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.8719898605830165,
|
|
"grad_norm": 0.3780258516399311,
|
|
"learning_rate": 1.9797727865064614e-05,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04145687073469162,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3624.2,
|
|
"valid_targets_min": 2919
|
|
},
|
|
{
|
|
"epoch": 3.8783269961977185,
|
|
"grad_norm": 0.4215935350501799,
|
|
"learning_rate": 1.9734521093623388e-05,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040532201528549194,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2250.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.884664131812421,
|
|
"grad_norm": 0.5071082793218558,
|
|
"learning_rate": 1.9671316974079046e-05,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05136043578386307,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2113.2,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 3.891001267427123,
|
|
"grad_norm": 0.6652922703694447,
|
|
"learning_rate": 1.960811613778424e-05,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0877087265253067,
|
|
"step": 3070,
|
|
"valid_targets_mean": 1423.0,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.897338403041825,
|
|
"grad_norm": 0.3875578773650679,
|
|
"learning_rate": 1.9544919216058825e-05,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029354147613048553,
|
|
"step": 3075,
|
|
"valid_targets_mean": 2962.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.903675538656527,
|
|
"grad_norm": 0.3464037796807788,
|
|
"learning_rate": 1.9481726840183518e-05,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03748301416635513,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3670.2,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 3.9100126742712296,
|
|
"grad_norm": 0.4203386434062504,
|
|
"learning_rate": 1.9418539641393667e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05017174780368805,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3383.0,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 3.9163498098859315,
|
|
"grad_norm": 0.48414156456459523,
|
|
"learning_rate": 1.9355358250872876e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05139702558517456,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.922686945500634,
|
|
"grad_norm": 0.39506866263884416,
|
|
"learning_rate": 1.9292183299746767e-05,
|
|
"loss": 0.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040174342691898346,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3082.1,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 3.929024081115336,
|
|
"grad_norm": 0.6574619633383776,
|
|
"learning_rate": 1.9229015419076584e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051615796983242035,
|
|
"step": 3100,
|
|
"valid_targets_mean": 1239.6,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 3.935361216730038,
|
|
"grad_norm": 0.45272363078474953,
|
|
"learning_rate": 1.9165855239852994e-05,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04953429847955704,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3606.0,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 3.94169835234474,
|
|
"grad_norm": 0.5164422471482016,
|
|
"learning_rate": 1.910270339298971e-05,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036357611417770386,
|
|
"step": 3110,
|
|
"valid_targets_mean": 1990.8,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.9480354879594426,
|
|
"grad_norm": 0.3775696958819015,
|
|
"learning_rate": 1.9039560509317198e-05,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04038350284099579,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2576.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.9543726235741445,
|
|
"grad_norm": 0.37261128487856326,
|
|
"learning_rate": 1.8976427219576426e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04737064987421036,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 3.9607097591888465,
|
|
"grad_norm": 0.2680754663961099,
|
|
"learning_rate": 1.8913304154412495e-05,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03227478265762329,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5796.5,
|
|
"valid_targets_min": 4657
|
|
},
|
|
{
|
|
"epoch": 3.967046894803549,
|
|
"grad_norm": 0.33546952793997975,
|
|
"learning_rate": 1.88501919443684e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04714677482843399,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4617.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.973384030418251,
|
|
"grad_norm": 0.36184320689399674,
|
|
"learning_rate": 1.8787091219878668e-05,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040016114711761475,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4013.6,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 3.9797211660329532,
|
|
"grad_norm": 0.4751259365388232,
|
|
"learning_rate": 1.8724002611263127e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05019637569785118,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2611.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.986058301647655,
|
|
"grad_norm": 0.33985134386507093,
|
|
"learning_rate": 1.866092674872056e-05,
|
|
"loss": 0.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034049734473228455,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3217.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 3.9923954372623576,
|
|
"grad_norm": 0.3643088235559669,
|
|
"learning_rate": 1.859786426232245e-05,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045606065541505814,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3370.2,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 3.9987325728770595,
|
|
"grad_norm": 0.5722133085384661,
|
|
"learning_rate": 1.8534815782006626e-05,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14138518273830414,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2522.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 4.005069708491762,
|
|
"grad_norm": 0.6507956377796683,
|
|
"learning_rate": 1.847178193757104e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15412212908267975,
|
|
"step": 3160,
|
|
"valid_targets_mean": 8029.9,
|
|
"valid_targets_min": 6142
|
|
},
|
|
{
|
|
"epoch": 4.011406844106464,
|
|
"grad_norm": 0.4478968870067142,
|
|
"learning_rate": 1.8408763358667446e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09384319931268692,
|
|
"step": 3165,
|
|
"valid_targets_mean": 6664.0,
|
|
"valid_targets_min": 5470
|
|
},
|
|
{
|
|
"epoch": 4.017743979721166,
|
|
"grad_norm": 0.40935318625692013,
|
|
"learning_rate": 1.8345760674795083e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09696362912654877,
|
|
"step": 3170,
|
|
"valid_targets_mean": 7126.9,
|
|
"valid_targets_min": 5368
|
|
},
|
|
{
|
|
"epoch": 4.024081115335868,
|
|
"grad_norm": 0.3707747600336817,
|
|
"learning_rate": 1.8282774515294444e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08650105446577072,
|
|
"step": 3175,
|
|
"valid_targets_mean": 7634.9,
|
|
"valid_targets_min": 5614
|
|
},
|
|
{
|
|
"epoch": 4.030418250950571,
|
|
"grad_norm": 0.3895718571685875,
|
|
"learning_rate": 1.8219805509340933e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08519905060529709,
|
|
"step": 3180,
|
|
"valid_targets_mean": 8630.8,
|
|
"valid_targets_min": 6362
|
|
},
|
|
{
|
|
"epoch": 4.0367553865652726,
|
|
"grad_norm": 0.4112385344473628,
|
|
"learning_rate": 1.815685428593862e-05,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09387166053056717,
|
|
"step": 3185,
|
|
"valid_targets_mean": 7383.6,
|
|
"valid_targets_min": 5844
|
|
},
|
|
{
|
|
"epoch": 4.0430925221799745,
|
|
"grad_norm": 0.3760677833067658,
|
|
"learning_rate": 1.8093921473913935e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10515899956226349,
|
|
"step": 3190,
|
|
"valid_targets_mean": 8017.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 4.0494296577946765,
|
|
"grad_norm": 0.4040567559370878,
|
|
"learning_rate": 1.8031007701909406e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07717405259609222,
|
|
"step": 3195,
|
|
"valid_targets_mean": 7051.1,
|
|
"valid_targets_min": 4135
|
|
},
|
|
{
|
|
"epoch": 4.055766793409379,
|
|
"grad_norm": 0.4095938056283674,
|
|
"learning_rate": 1.7968113598377356e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07556945085525513,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5370.4,
|
|
"valid_targets_min": 3919
|
|
},
|
|
{
|
|
"epoch": 4.062103929024081,
|
|
"grad_norm": 0.40833134716136543,
|
|
"learning_rate": 1.7905239791573634e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0765988826751709,
|
|
"step": 3205,
|
|
"valid_targets_mean": 6063.6,
|
|
"valid_targets_min": 4863
|
|
},
|
|
{
|
|
"epoch": 4.068441064638783,
|
|
"grad_norm": 0.43628785524935376,
|
|
"learning_rate": 1.784238690955137e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08670999854803085,
|
|
"step": 3210,
|
|
"valid_targets_mean": 7001.2,
|
|
"valid_targets_min": 6020
|
|
},
|
|
{
|
|
"epoch": 4.074778200253485,
|
|
"grad_norm": 0.40385758372234326,
|
|
"learning_rate": 1.7779555580154636e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08487959206104279,
|
|
"step": 3215,
|
|
"valid_targets_mean": 6181.9,
|
|
"valid_targets_min": 4669
|
|
},
|
|
{
|
|
"epoch": 4.081115335868188,
|
|
"grad_norm": 0.3777064113031733,
|
|
"learning_rate": 1.771674643101225e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07879698276519775,
|
|
"step": 3220,
|
|
"valid_targets_mean": 6250.2,
|
|
"valid_targets_min": 4900
|
|
},
|
|
{
|
|
"epoch": 4.08745247148289,
|
|
"grad_norm": 0.3695080695590852,
|
|
"learning_rate": 1.765396008953143e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776199281215668,
|
|
"step": 3225,
|
|
"valid_targets_mean": 7156.4,
|
|
"valid_targets_min": 5323
|
|
},
|
|
{
|
|
"epoch": 4.093789607097592,
|
|
"grad_norm": 0.39525555462013906,
|
|
"learning_rate": 1.7591197182891612e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08852382004261017,
|
|
"step": 3230,
|
|
"valid_targets_mean": 7215.8,
|
|
"valid_targets_min": 4720
|
|
},
|
|
{
|
|
"epoch": 4.100126742712294,
|
|
"grad_norm": 0.3927214229118524,
|
|
"learning_rate": 1.7528458338038095e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09028687328100204,
|
|
"step": 3235,
|
|
"valid_targets_mean": 6550.8,
|
|
"valid_targets_min": 5739
|
|
},
|
|
{
|
|
"epoch": 4.106463878326996,
|
|
"grad_norm": 0.39167065514260707,
|
|
"learning_rate": 1.746574418167584e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0881272554397583,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6726.8,
|
|
"valid_targets_min": 4540
|
|
},
|
|
{
|
|
"epoch": 4.112801013941699,
|
|
"grad_norm": 0.49495108812372973,
|
|
"learning_rate": 1.740305534026321e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09628526866436005,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4573.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.119138149556401,
|
|
"grad_norm": 0.42012423441567087,
|
|
"learning_rate": 1.7340392440005656e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931392431259155,
|
|
"step": 3250,
|
|
"valid_targets_mean": 6430.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 4.1254752851711025,
|
|
"grad_norm": 0.6579537143049158,
|
|
"learning_rate": 1.7277756106849542e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026274899020791054,
|
|
"step": 3255,
|
|
"valid_targets_mean": 583.1,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 4.1318124207858045,
|
|
"grad_norm": 0.38365712675926134,
|
|
"learning_rate": 1.721514696647581e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09118622541427612,
|
|
"step": 3260,
|
|
"valid_targets_mean": 8230.9,
|
|
"valid_targets_min": 5061
|
|
},
|
|
{
|
|
"epoch": 4.138149556400507,
|
|
"grad_norm": 0.43316890423801474,
|
|
"learning_rate": 1.7152565644293812e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09656210988759995,
|
|
"step": 3265,
|
|
"valid_targets_mean": 7238.9,
|
|
"valid_targets_min": 5112
|
|
},
|
|
{
|
|
"epoch": 4.144486692015209,
|
|
"grad_norm": 0.40318780295632867,
|
|
"learning_rate": 1.7090012765434974e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07718829810619354,
|
|
"step": 3270,
|
|
"valid_targets_mean": 6322.8,
|
|
"valid_targets_min": 4867
|
|
},
|
|
{
|
|
"epoch": 4.150823827629911,
|
|
"grad_norm": 0.3808568019796685,
|
|
"learning_rate": 1.702748895474665e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08667613565921783,
|
|
"step": 3275,
|
|
"valid_targets_mean": 7887.0,
|
|
"valid_targets_min": 6004
|
|
},
|
|
{
|
|
"epoch": 4.157160963244613,
|
|
"grad_norm": 0.3649199509739971,
|
|
"learning_rate": 1.6964994836785788e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0775088295340538,
|
|
"step": 3280,
|
|
"valid_targets_mean": 7090.2,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 4.163498098859316,
|
|
"grad_norm": 0.3841065051439073,
|
|
"learning_rate": 1.6902531035812744e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08092792332172394,
|
|
"step": 3285,
|
|
"valid_targets_mean": 7441.9,
|
|
"valid_targets_min": 4202
|
|
},
|
|
{
|
|
"epoch": 4.169835234474018,
|
|
"grad_norm": 0.3685589953284071,
|
|
"learning_rate": 1.6840098175785052e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0794074535369873,
|
|
"step": 3290,
|
|
"valid_targets_mean": 7901.8,
|
|
"valid_targets_min": 5060
|
|
},
|
|
{
|
|
"epoch": 4.17617237008872,
|
|
"grad_norm": 0.4461549053932688,
|
|
"learning_rate": 1.6777696880351143e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07229609787464142,
|
|
"step": 3295,
|
|
"valid_targets_mean": 8557.6,
|
|
"valid_targets_min": 6331
|
|
},
|
|
{
|
|
"epoch": 4.182509505703422,
|
|
"grad_norm": 0.43070290253728966,
|
|
"learning_rate": 1.6715327772844174e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08373712003231049,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4976.4,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 4.188846641318124,
|
|
"grad_norm": 0.4353425517800193,
|
|
"learning_rate": 1.665299147627575e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09483518451452255,
|
|
"step": 3305,
|
|
"valid_targets_mean": 7129.1,
|
|
"valid_targets_min": 5331
|
|
},
|
|
{
|
|
"epoch": 4.195183776932827,
|
|
"grad_norm": 0.44132980936063754,
|
|
"learning_rate": 1.6590688613329754e-05,
|
|
"loss": 0.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09252781420946121,
|
|
"step": 3310,
|
|
"valid_targets_mean": 6864.0,
|
|
"valid_targets_min": 4697
|
|
},
|
|
{
|
|
"epoch": 4.201520912547529,
|
|
"grad_norm": 0.4303990791989475,
|
|
"learning_rate": 1.6528419806356058e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08542542159557343,
|
|
"step": 3315,
|
|
"valid_targets_mean": 6631.9,
|
|
"valid_targets_min": 4623
|
|
},
|
|
{
|
|
"epoch": 4.2078580481622305,
|
|
"grad_norm": 0.3868651304621367,
|
|
"learning_rate": 1.6466185677364378e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08131472766399384,
|
|
"step": 3320,
|
|
"valid_targets_mean": 6330.4,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 4.2141951837769325,
|
|
"grad_norm": 0.40233873874634757,
|
|
"learning_rate": 1.6403986848018022e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08282483369112015,
|
|
"step": 3325,
|
|
"valid_targets_mean": 6814.8,
|
|
"valid_targets_min": 4964
|
|
},
|
|
{
|
|
"epoch": 4.220532319391635,
|
|
"grad_norm": 0.4489950981810007,
|
|
"learning_rate": 1.6341823939627658e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09733180701732635,
|
|
"step": 3330,
|
|
"valid_targets_mean": 6915.8,
|
|
"valid_targets_min": 5321
|
|
},
|
|
{
|
|
"epoch": 4.226869455006337,
|
|
"grad_norm": 0.39547203519428475,
|
|
"learning_rate": 1.6279697573145177e-05,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08847206830978394,
|
|
"step": 3335,
|
|
"valid_targets_mean": 6464.5,
|
|
"valid_targets_min": 5185
|
|
},
|
|
{
|
|
"epoch": 4.233206590621039,
|
|
"grad_norm": 0.40225921596760217,
|
|
"learning_rate": 1.6217608369157417e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08991692960262299,
|
|
"step": 3340,
|
|
"valid_targets_mean": 7048.6,
|
|
"valid_targets_min": 4913
|
|
},
|
|
{
|
|
"epoch": 4.239543726235741,
|
|
"grad_norm": 0.43920064261207314,
|
|
"learning_rate": 1.6155556947880027e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09948025643825531,
|
|
"step": 3345,
|
|
"valid_targets_mean": 7469.2,
|
|
"valid_targets_min": 5426
|
|
},
|
|
{
|
|
"epoch": 4.245880861850444,
|
|
"grad_norm": 0.5004068434611045,
|
|
"learning_rate": 1.6093543929151198e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09273447096347809,
|
|
"step": 3350,
|
|
"valid_targets_mean": 6819.4,
|
|
"valid_targets_min": 5286
|
|
},
|
|
{
|
|
"epoch": 4.252217997465146,
|
|
"grad_norm": 0.39343673005098523,
|
|
"learning_rate": 1.6031569932425557e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09243512153625488,
|
|
"step": 3355,
|
|
"valid_targets_mean": 7107.5,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 4.258555133079848,
|
|
"grad_norm": 0.4081265913462701,
|
|
"learning_rate": 1.5969635576767915e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08472248911857605,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5642.6,
|
|
"valid_targets_min": 4838
|
|
},
|
|
{
|
|
"epoch": 4.26489226869455,
|
|
"grad_norm": 0.847595075851216,
|
|
"learning_rate": 1.590774148084709e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13928478956222534,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2054.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 4.271229404309253,
|
|
"grad_norm": 0.4767433112556764,
|
|
"learning_rate": 1.5845888262929778e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08068570494651794,
|
|
"step": 3370,
|
|
"valid_targets_mean": 6686.4,
|
|
"valid_targets_min": 5256
|
|
},
|
|
{
|
|
"epoch": 4.277566539923955,
|
|
"grad_norm": 0.41897875921568106,
|
|
"learning_rate": 1.5784076540874306e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09213928878307343,
|
|
"step": 3375,
|
|
"valid_targets_mean": 6216.6,
|
|
"valid_targets_min": 5542
|
|
},
|
|
{
|
|
"epoch": 4.283903675538657,
|
|
"grad_norm": 0.4004650237809308,
|
|
"learning_rate": 1.572230693212452e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08594687283039093,
|
|
"step": 3380,
|
|
"valid_targets_mean": 6486.1,
|
|
"valid_targets_min": 5531
|
|
},
|
|
{
|
|
"epoch": 4.2902408111533585,
|
|
"grad_norm": 0.4138971283954118,
|
|
"learning_rate": 1.5660580053703558e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0850023552775383,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5797.4,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 4.2965779467680605,
|
|
"grad_norm": 0.4320522316838559,
|
|
"learning_rate": 1.5598896522207772e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08721490204334259,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6357.5,
|
|
"valid_targets_min": 5195
|
|
},
|
|
{
|
|
"epoch": 4.302915082382763,
|
|
"grad_norm": 0.44667244731980843,
|
|
"learning_rate": 1.5537256953800465e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0873722955584526,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5808.9,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 4.309252217997465,
|
|
"grad_norm": 0.5365607508140612,
|
|
"learning_rate": 1.547566196420582e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05607900023460388,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2591.5,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.315589353612167,
|
|
"grad_norm": 0.45706912756488494,
|
|
"learning_rate": 1.541411216870271e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08343985676765442,
|
|
"step": 3405,
|
|
"valid_targets_mean": 6006.9,
|
|
"valid_targets_min": 4769
|
|
},
|
|
{
|
|
"epoch": 4.321926489226869,
|
|
"grad_norm": 0.4313133429490614,
|
|
"learning_rate": 1.5352608182118546e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08340340852737427,
|
|
"step": 3410,
|
|
"valid_targets_mean": 6764.2,
|
|
"valid_targets_min": 5046
|
|
},
|
|
{
|
|
"epoch": 4.328263624841572,
|
|
"grad_norm": 0.48433144390368466,
|
|
"learning_rate": 1.5291150618823172e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09057652950286865,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6055.8,
|
|
"valid_targets_min": 5182
|
|
},
|
|
{
|
|
"epoch": 4.334600760456274,
|
|
"grad_norm": 0.39679448659828187,
|
|
"learning_rate": 1.522974009272268e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08669161796569824,
|
|
"step": 3420,
|
|
"valid_targets_mean": 7101.6,
|
|
"valid_targets_min": 5366
|
|
},
|
|
{
|
|
"epoch": 4.340937896070976,
|
|
"grad_norm": 0.42168850019460746,
|
|
"learning_rate": 1.5168377217253333e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.087561696767807,
|
|
"step": 3425,
|
|
"valid_targets_mean": 7111.9,
|
|
"valid_targets_min": 5192
|
|
},
|
|
{
|
|
"epoch": 4.347275031685678,
|
|
"grad_norm": 0.4485831061922792,
|
|
"learning_rate": 1.5107062605375371e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08478708565235138,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5509.9,
|
|
"valid_targets_min": 4790
|
|
},
|
|
{
|
|
"epoch": 4.35361216730038,
|
|
"grad_norm": 0.5865065020145389,
|
|
"learning_rate": 1.5045796869566953e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05632995814085007,
|
|
"step": 3435,
|
|
"valid_targets_mean": 1942.4,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.359949302915083,
|
|
"grad_norm": 0.3812960844517703,
|
|
"learning_rate": 1.4984580621817995e-05,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04109615087509155,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3192.9,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 4.366286438529785,
|
|
"grad_norm": 0.37231634462495694,
|
|
"learning_rate": 1.4923414473624083e-05,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04177839308977127,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3343.8,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 4.3726235741444865,
|
|
"grad_norm": 0.3986483848886159,
|
|
"learning_rate": 1.4862299035980334e-05,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03591617941856384,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3391.5,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 4.3789607097591885,
|
|
"grad_norm": 0.4350821359129515,
|
|
"learning_rate": 1.4801234919375325e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04020581394433975,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3268.0,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 4.385297845373891,
|
|
"grad_norm": 0.4470281137968758,
|
|
"learning_rate": 1.4740222733784993e-05,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0329197496175766,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2079.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.391634980988593,
|
|
"grad_norm": 1.073619847640112,
|
|
"learning_rate": 1.46792630886665e-05,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.087935671210289,
|
|
"step": 3465,
|
|
"valid_targets_mean": 1305.5,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 4.397972116603295,
|
|
"grad_norm": 0.438535473894142,
|
|
"learning_rate": 1.4618356592952205e-05,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0384996235370636,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4012.4,
|
|
"valid_targets_min": 3152
|
|
},
|
|
{
|
|
"epoch": 4.404309252217997,
|
|
"grad_norm": 0.4288712058539197,
|
|
"learning_rate": 1.4557503855043535e-05,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0413883700966835,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3015.2,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 4.4106463878327,
|
|
"grad_norm": 0.5757023048301575,
|
|
"learning_rate": 1.4496705482804943e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10589364171028137,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2490.1,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 4.416983523447402,
|
|
"grad_norm": 0.2648841662356395,
|
|
"learning_rate": 1.4435962083557779e-05,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04002828150987625,
|
|
"step": 3485,
|
|
"valid_targets_mean": 6586.0,
|
|
"valid_targets_min": 5490
|
|
},
|
|
{
|
|
"epoch": 4.423320659062104,
|
|
"grad_norm": 0.37135836259899613,
|
|
"learning_rate": 1.4375274264074319e-05,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033026646822690964,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2234.0,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.429657794676806,
|
|
"grad_norm": 0.31800488283244577,
|
|
"learning_rate": 1.4314642630571597e-05,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03745786100625992,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3456.5,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 4.435994930291509,
|
|
"grad_norm": 0.42522791907936675,
|
|
"learning_rate": 1.4254067788705407e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03857950121164322,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2241.9,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 4.442332065906211,
|
|
"grad_norm": 0.3897917145216043,
|
|
"learning_rate": 1.4193550343564284e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03128671646118164,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2544.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.448669201520913,
|
|
"grad_norm": 0.41637578889114835,
|
|
"learning_rate": 1.4133090899663371e-05,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04232652485370636,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 4.455006337135615,
|
|
"grad_norm": 0.36530670525775477,
|
|
"learning_rate": 1.4072690060938463e-05,
|
|
"loss": 0.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03570668026804924,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2980.9,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 4.4613434727503165,
|
|
"grad_norm": 0.529228053224405,
|
|
"learning_rate": 1.4012348430739926e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06767016649246216,
|
|
"step": 3520,
|
|
"valid_targets_mean": 1279.6,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.467680608365019,
|
|
"grad_norm": 0.5452459641620294,
|
|
"learning_rate": 1.39520666118267e-05,
|
|
"loss": 0.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03841352462768555,
|
|
"step": 3525,
|
|
"valid_targets_mean": 1756.1,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 4.474017743979721,
|
|
"grad_norm": 0.4168089492244079,
|
|
"learning_rate": 1.3891845206360244e-05,
|
|
"loss": 0.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03970293700695038,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3653.8,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 4.480354879594423,
|
|
"grad_norm": 0.43866827002185593,
|
|
"learning_rate": 1.3831684815898553e-05,
|
|
"loss": 0.0933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034057825803756714,
|
|
"step": 3535,
|
|
"valid_targets_mean": 1861.4,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.486692015209125,
|
|
"grad_norm": 0.539070977077303,
|
|
"learning_rate": 1.3771586041390149e-05,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0688154324889183,
|
|
"step": 3540,
|
|
"valid_targets_mean": 1704.8,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.493029150823828,
|
|
"grad_norm": 0.4038053899963813,
|
|
"learning_rate": 1.371154948316803e-05,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04917750880122185,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2861.1,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.49936628643853,
|
|
"grad_norm": 0.44401897477226854,
|
|
"learning_rate": 1.3651575740943746e-05,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04368500038981438,
|
|
"step": 3550,
|
|
"valid_targets_mean": 1931.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.505703422053232,
|
|
"grad_norm": 0.4953876980543921,
|
|
"learning_rate": 1.3591665413801348e-05,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07628709077835083,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2699.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.512040557667934,
|
|
"grad_norm": 0.5050178341732591,
|
|
"learning_rate": 1.3531819100191448e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08403439819812775,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3225.1,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 4.518377693282636,
|
|
"grad_norm": 0.519492109183501,
|
|
"learning_rate": 1.3472037397925191e-05,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0731634572148323,
|
|
"step": 3565,
|
|
"valid_targets_mean": 2218.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 4.524714828897339,
|
|
"grad_norm": 0.6896751635410471,
|
|
"learning_rate": 1.3412320904168338e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21972641348838806,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2309.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.531051964512041,
|
|
"grad_norm": 0.44447810124898035,
|
|
"learning_rate": 1.3352670215435271e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029691850766539574,
|
|
"step": 3575,
|
|
"valid_targets_mean": 1056.4,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.537389100126743,
|
|
"grad_norm": 0.4110319754682401,
|
|
"learning_rate": 1.3293085927583018e-05,
|
|
"loss": 0.0981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0503869391977787,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3701.1,
|
|
"valid_targets_min": 2349
|
|
},
|
|
{
|
|
"epoch": 4.5437262357414445,
|
|
"grad_norm": 0.4424435130488359,
|
|
"learning_rate": 1.3233568635805347e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059051912277936935,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3504.8,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.550063371356147,
|
|
"grad_norm": 0.42048507427391457,
|
|
"learning_rate": 1.3174118934626785e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05271763354539871,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4043.9,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.556400506970849,
|
|
"grad_norm": 0.4180453923440619,
|
|
"learning_rate": 1.3114737417896694e-05,
|
|
"loss": 0.0835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05150268226861954,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3744.5,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 4.562737642585551,
|
|
"grad_norm": 0.3672807900376062,
|
|
"learning_rate": 1.3055424678783313e-05,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03365042805671692,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2820.9,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.569074778200253,
|
|
"grad_norm": 0.42426580703962236,
|
|
"learning_rate": 1.2996181309767896e-05,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04497058689594269,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2604.8,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.575411913814955,
|
|
"grad_norm": 0.4011722404649056,
|
|
"learning_rate": 1.2937007902638708e-05,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036316610872745514,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4292.5,
|
|
"valid_targets_min": 3013
|
|
},
|
|
{
|
|
"epoch": 4.581749049429658,
|
|
"grad_norm": 0.3658968765436006,
|
|
"learning_rate": 1.2877905048485184e-05,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037785254418849945,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3588.4,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 4.58808618504436,
|
|
"grad_norm": 0.40910077447869847,
|
|
"learning_rate": 1.2818873337691993e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03945530205965042,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3900.8,
|
|
"valid_targets_min": 3297
|
|
},
|
|
{
|
|
"epoch": 4.594423320659062,
|
|
"grad_norm": 0.4323287029078876,
|
|
"learning_rate": 1.275991335993313e-05,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05373235046863556,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3566.6,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 4.600760456273765,
|
|
"grad_norm": 0.3132259012108508,
|
|
"learning_rate": 1.2701025704166066e-05,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022701701149344444,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2558.6,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.607097591888467,
|
|
"grad_norm": 0.3880221359064098,
|
|
"learning_rate": 1.2642210958625818e-05,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026679987087845802,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2930.1,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.613434727503169,
|
|
"grad_norm": 0.3838309451700513,
|
|
"learning_rate": 1.2583469710819118e-05,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037138618528842926,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3742.1,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 4.619771863117871,
|
|
"grad_norm": 0.4072436464963615,
|
|
"learning_rate": 1.2524802547518492e-05,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0322340726852417,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2836.9,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 4.6261089987325725,
|
|
"grad_norm": 0.7655826872146105,
|
|
"learning_rate": 1.2466210054756453e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09948647767305374,
|
|
"step": 3650,
|
|
"valid_targets_mean": 1251.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 4.632446134347275,
|
|
"grad_norm": 0.38297997397163464,
|
|
"learning_rate": 1.2407692817819619e-05,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06282910704612732,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3305.5,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 4.638783269961977,
|
|
"grad_norm": 0.5134073669168782,
|
|
"learning_rate": 1.2349251421242846e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05967462435364723,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2356.9,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.645120405576679,
|
|
"grad_norm": 0.39864792951518085,
|
|
"learning_rate": 1.2290886448803447e-05,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03576682507991791,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3074.0,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.651457541191381,
|
|
"grad_norm": 0.696371491933424,
|
|
"learning_rate": 1.2232598483515297e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051472313702106476,
|
|
"step": 3670,
|
|
"valid_targets_mean": 1958.4,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 4.657794676806084,
|
|
"grad_norm": 0.4703846625521131,
|
|
"learning_rate": 1.2174388107623065e-05,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04495227709412575,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3170.1,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 4.664131812420786,
|
|
"grad_norm": 0.7210430787222473,
|
|
"learning_rate": 1.2116255902596343e-05,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05139915645122528,
|
|
"step": 3680,
|
|
"valid_targets_mean": 1274.2,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.670468948035488,
|
|
"grad_norm": 0.44998715380742205,
|
|
"learning_rate": 1.2058202449123889e-05,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0435343012213707,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3223.1,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.67680608365019,
|
|
"grad_norm": 0.42322852211762996,
|
|
"learning_rate": 1.2000228327107787e-05,
|
|
"loss": 0.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04648396372795105,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3822.5,
|
|
"valid_targets_min": 2989
|
|
},
|
|
{
|
|
"epoch": 4.683143219264892,
|
|
"grad_norm": 0.4557170088299088,
|
|
"learning_rate": 1.1942334115657694e-05,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03743886575102806,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2606.0,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.689480354879595,
|
|
"grad_norm": 0.4051289008306255,
|
|
"learning_rate": 1.1884520393085e-05,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03932993859052658,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4201.2,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 4.695817490494297,
|
|
"grad_norm": 0.3414053973747026,
|
|
"learning_rate": 1.1826787736897101e-05,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04191991686820984,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4603.2,
|
|
"valid_targets_min": 4175
|
|
},
|
|
{
|
|
"epoch": 4.702154626108999,
|
|
"grad_norm": 0.3582340305239411,
|
|
"learning_rate": 1.1769136723791621e-05,
|
|
"loss": 0.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03489068150520325,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4007.9,
|
|
"valid_targets_min": 2838
|
|
},
|
|
{
|
|
"epoch": 4.7084917617237005,
|
|
"grad_norm": 0.3800568641114985,
|
|
"learning_rate": 1.1711567929650616e-05,
|
|
"loss": 0.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031015362590551376,
|
|
"step": 3715,
|
|
"valid_targets_mean": 2875.9,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 4.714828897338403,
|
|
"grad_norm": 0.504111657409252,
|
|
"learning_rate": 1.1654081929534882e-05,
|
|
"loss": 0.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042262159287929535,
|
|
"step": 3720,
|
|
"valid_targets_mean": 1940.6,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 4.721166032953105,
|
|
"grad_norm": 0.3046807708008643,
|
|
"learning_rate": 1.1596679297678146e-05,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02562888339161873,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4212.2,
|
|
"valid_targets_min": 3934
|
|
},
|
|
{
|
|
"epoch": 4.727503168567807,
|
|
"grad_norm": 0.43700471434079735,
|
|
"learning_rate": 1.1539360607481383e-05,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03133932501077652,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2616.0,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.733840304182509,
|
|
"grad_norm": 0.49679152359176304,
|
|
"learning_rate": 1.1482126431507043e-05,
|
|
"loss": 0.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056475985795259476,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3798.2,
|
|
"valid_targets_min": 2782
|
|
},
|
|
{
|
|
"epoch": 4.740177439797211,
|
|
"grad_norm": 0.33316107323105537,
|
|
"learning_rate": 1.1424977341473378e-05,
|
|
"loss": 0.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02669833041727543,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3480.8,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.746514575411914,
|
|
"grad_norm": 0.6029623659326176,
|
|
"learning_rate": 1.1367913908248705e-05,
|
|
"loss": 0.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05315272510051727,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2048.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.752851711026616,
|
|
"grad_norm": 0.6172902227589799,
|
|
"learning_rate": 1.1310936701845678e-05,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05266937240958214,
|
|
"step": 3750,
|
|
"valid_targets_mean": 2918.8,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 4.759188846641318,
|
|
"grad_norm": 0.5286801415468692,
|
|
"learning_rate": 1.1254046291415665e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05015048757195473,
|
|
"step": 3755,
|
|
"valid_targets_mean": 1942.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.765525982256021,
|
|
"grad_norm": 0.5009414321899349,
|
|
"learning_rate": 1.1197243245242978e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045358121395111084,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2201.6,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 4.771863117870723,
|
|
"grad_norm": 0.5507155568989981,
|
|
"learning_rate": 1.1140528130739274e-05,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03730563074350357,
|
|
"step": 3765,
|
|
"valid_targets_mean": 1288.0,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.778200253485425,
|
|
"grad_norm": 0.3843998785000943,
|
|
"learning_rate": 1.1083901514437815e-05,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032951176166534424,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2968.9,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 4.784537389100127,
|
|
"grad_norm": 0.5311270996035165,
|
|
"learning_rate": 1.1027363961987882e-05,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03526683151721954,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3416.6,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.7908745247148286,
|
|
"grad_norm": 0.45773623738831687,
|
|
"learning_rate": 1.0970916038149064e-05,
|
|
"loss": 0.0801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04306003451347351,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3096.2,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.797211660329531,
|
|
"grad_norm": 0.4773549005880393,
|
|
"learning_rate": 1.091455830678566e-05,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037601038813591,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2400.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.803548795944233,
|
|
"grad_norm": 0.48505585355209163,
|
|
"learning_rate": 1.0858291330861e-05,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056301429867744446,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3119.5,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 4.809885931558935,
|
|
"grad_norm": 0.41847133453182483,
|
|
"learning_rate": 1.0802115672431856e-05,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037923239171504974,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3015.2,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 4.816223067173637,
|
|
"grad_norm": 0.45256848429641117,
|
|
"learning_rate": 1.0746031892642846e-05,
|
|
"loss": 0.064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035946667194366455,
|
|
"step": 3800,
|
|
"valid_targets_mean": 1436.2,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.82256020278834,
|
|
"grad_norm": 0.4841404678034511,
|
|
"learning_rate": 1.0690040551720764e-05,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03834570199251175,
|
|
"step": 3805,
|
|
"valid_targets_mean": 2573.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.828897338403042,
|
|
"grad_norm": 0.3985052781497097,
|
|
"learning_rate": 1.0634142208969047e-05,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031149961054325104,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3895.6,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.835234474017744,
|
|
"grad_norm": 0.35789031738201016,
|
|
"learning_rate": 1.0578337422762162e-05,
|
|
"loss": 0.0644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032099559903144836,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3843.5,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 4.841571609632446,
|
|
"grad_norm": 0.3923808444081495,
|
|
"learning_rate": 1.0522626750540029e-05,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03874257951974869,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3931.1,
|
|
"valid_targets_min": 3322
|
|
},
|
|
{
|
|
"epoch": 4.847908745247148,
|
|
"grad_norm": 0.4500842841261637,
|
|
"learning_rate": 1.0467010748802444e-05,
|
|
"loss": 0.071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037358999252319336,
|
|
"step": 3825,
|
|
"valid_targets_mean": 2579.2,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 4.854245880861851,
|
|
"grad_norm": 0.39079177132955994,
|
|
"learning_rate": 1.0411489973103525e-05,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040931105613708496,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 4.860583016476553,
|
|
"grad_norm": 0.29302968084833775,
|
|
"learning_rate": 1.0356064978046197e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022984012961387634,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3372.4,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.866920152091255,
|
|
"grad_norm": 0.5374851149423621,
|
|
"learning_rate": 1.0300736317276576e-05,
|
|
"loss": 0.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04663417488336563,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3080.8,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 4.873257287705957,
|
|
"grad_norm": 0.36857066959299434,
|
|
"learning_rate": 1.0245504543478541e-05,
|
|
"loss": 0.0712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03406180441379547,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3857.6,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 4.879594423320659,
|
|
"grad_norm": 0.6024532464031291,
|
|
"learning_rate": 1.0190370208368105e-05,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05001235008239746,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2207.1,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 4.885931558935361,
|
|
"grad_norm": 0.3968573758332169,
|
|
"learning_rate": 1.0135333862687993e-05,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042467691004276276,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3912.5,
|
|
"valid_targets_min": 3129
|
|
},
|
|
{
|
|
"epoch": 4.892268694550063,
|
|
"grad_norm": 0.712866147978305,
|
|
"learning_rate": 1.0080396056202071e-05,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043578531593084335,
|
|
"step": 3860,
|
|
"valid_targets_mean": 1114.8,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 4.898605830164765,
|
|
"grad_norm": 0.5007655723931989,
|
|
"learning_rate": 1.002555733768992e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041624411940574646,
|
|
"step": 3865,
|
|
"valid_targets_mean": 1913.4,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 4.904942965779467,
|
|
"grad_norm": 0.4972732864026348,
|
|
"learning_rate": 9.970818254941286e-06,
|
|
"loss": 0.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056848425418138504,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2510.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.91128010139417,
|
|
"grad_norm": 0.3997297785475291,
|
|
"learning_rate": 9.91617935475067e-06,
|
|
"loss": 0.0785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03252048417925835,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3938.4,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 4.917617237008872,
|
|
"grad_norm": 0.43648117339212733,
|
|
"learning_rate": 9.861641182911836e-06,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03892311081290245,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3575.4,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 4.923954372623574,
|
|
"grad_norm": 0.40352826534474334,
|
|
"learning_rate": 9.807204284212342e-06,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025977574288845062,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2924.6,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 4.930291508238277,
|
|
"grad_norm": 1.050983865311668,
|
|
"learning_rate": 9.752869202428143e-06,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05868568271398544,
|
|
"step": 3890,
|
|
"valid_targets_mean": 873.5,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 4.936628643852979,
|
|
"grad_norm": 0.42295586624015263,
|
|
"learning_rate": 9.69863648031811e-06,
|
|
"loss": 0.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03382163494825363,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3029.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.942965779467681,
|
|
"grad_norm": 0.5149817023683718,
|
|
"learning_rate": 9.64450665961866e-06,
|
|
"loss": 0.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045273907482624054,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3753.5,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 4.949302915082383,
|
|
"grad_norm": 0.4601774436706208,
|
|
"learning_rate": 9.590480281038284e-06,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03757575899362564,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3556.9,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 4.955640050697085,
|
|
"grad_norm": 0.3344510236047546,
|
|
"learning_rate": 9.536557884252204e-06,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03164272382855415,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4082.0,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 4.961977186311787,
|
|
"grad_norm": 0.31543285800352566,
|
|
"learning_rate": 9.48274000789696e-06,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03564603626728058,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4387.9,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.968314321926489,
|
|
"grad_norm": 0.35621296523768875,
|
|
"learning_rate": 9.429027189564997e-06,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03764301538467407,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4775.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.974651457541191,
|
|
"grad_norm": 0.47191530058098474,
|
|
"learning_rate": 9.375419965799357e-06,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05518128722906113,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2982.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.980988593155893,
|
|
"grad_norm": 0.4170673267534957,
|
|
"learning_rate": 9.321918872088259e-06,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04554079473018646,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3277.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.987325728770596,
|
|
"grad_norm": 0.4919615101628656,
|
|
"learning_rate": 9.268524442859806e-06,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07051996141672134,
|
|
"step": 3935,
|
|
"valid_targets_mean": 1846.9,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.993662864385298,
|
|
"grad_norm": 0.36679459861950864,
|
|
"learning_rate": 9.215237211476582e-06,
|
|
"loss": 0.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034877531230449677,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3313.0,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4580160241396483,
|
|
"learning_rate": 9.162057710230408e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07706436514854431,
|
|
"step": 3945,
|
|
"valid_targets_mean": 7404.6,
|
|
"valid_targets_min": 3513
|
|
},
|
|
{
|
|
"epoch": 5.006337135614702,
|
|
"grad_norm": 0.6777923746659404,
|
|
"learning_rate": 9.10898647033694e-06,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09000250697135925,
|
|
"step": 3950,
|
|
"valid_targets_mean": 7389.0,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 5.012674271229404,
|
|
"grad_norm": 0.6776968534632705,
|
|
"learning_rate": 9.056024021930401e-06,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04829111322760582,
|
|
"step": 3955,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 5.019011406844107,
|
|
"grad_norm": 0.4347239651617738,
|
|
"learning_rate": 9.003170894058309e-06,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09010399878025055,
|
|
"step": 3960,
|
|
"valid_targets_mean": 8539.9,
|
|
"valid_targets_min": 6761
|
|
},
|
|
{
|
|
"epoch": 5.025348542458809,
|
|
"grad_norm": 0.3897555649737464,
|
|
"learning_rate": 8.950427614676138e-06,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07889628410339355,
|
|
"step": 3965,
|
|
"valid_targets_mean": 7579.0,
|
|
"valid_targets_min": 6174
|
|
},
|
|
{
|
|
"epoch": 5.031685678073511,
|
|
"grad_norm": 0.39291067994340767,
|
|
"learning_rate": 8.897794710642098e-06,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07926777750253677,
|
|
"step": 3970,
|
|
"valid_targets_mean": 7328.2,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 5.038022813688213,
|
|
"grad_norm": 0.4172481142439373,
|
|
"learning_rate": 8.845272707711836e-06,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09160207211971283,
|
|
"step": 3975,
|
|
"valid_targets_mean": 6838.2,
|
|
"valid_targets_min": 5034
|
|
},
|
|
{
|
|
"epoch": 5.044359949302915,
|
|
"grad_norm": 0.4203503639843128,
|
|
"learning_rate": 8.792862130533206e-06,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09447371959686279,
|
|
"step": 3980,
|
|
"valid_targets_mean": 7248.2,
|
|
"valid_targets_min": 5567
|
|
},
|
|
{
|
|
"epoch": 5.050697084917617,
|
|
"grad_norm": 0.4126272089994056,
|
|
"learning_rate": 8.740563502640997e-06,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07689834386110306,
|
|
"step": 3985,
|
|
"valid_targets_mean": 6234.0,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 5.057034220532319,
|
|
"grad_norm": 0.6978756054568616,
|
|
"learning_rate": 8.688377346451754e-06,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024097807705402374,
|
|
"step": 3990,
|
|
"valid_targets_mean": 602.0,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 5.063371356147021,
|
|
"grad_norm": 0.39872379642740224,
|
|
"learning_rate": 8.636304183258505e-06,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08006036281585693,
|
|
"step": 3995,
|
|
"valid_targets_mean": 7299.9,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 5.069708491761724,
|
|
"grad_norm": 0.403553485986575,
|
|
"learning_rate": 8.584344533225588e-06,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0797131359577179,
|
|
"step": 4000,
|
|
"valid_targets_mean": 6140.0,
|
|
"valid_targets_min": 4648
|
|
},
|
|
{
|
|
"epoch": 5.076045627376426,
|
|
"grad_norm": 0.4431570400784521,
|
|
"learning_rate": 8.532498915383447e-06,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08615205436944962,
|
|
"step": 4005,
|
|
"valid_targets_mean": 6603.0,
|
|
"valid_targets_min": 4882
|
|
},
|
|
{
|
|
"epoch": 5.082382762991128,
|
|
"grad_norm": 0.41485976447897555,
|
|
"learning_rate": 8.480767847623448e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08480145037174225,
|
|
"step": 4010,
|
|
"valid_targets_mean": 8015.0,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 5.08871989860583,
|
|
"grad_norm": 0.38124456876129,
|
|
"learning_rate": 8.429151846692714e-06,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0730145201086998,
|
|
"step": 4015,
|
|
"valid_targets_mean": 6305.2,
|
|
"valid_targets_min": 4622
|
|
},
|
|
{
|
|
"epoch": 5.095057034220532,
|
|
"grad_norm": 0.4054783096465757,
|
|
"learning_rate": 8.377651428188923e-06,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08846744149923325,
|
|
"step": 4020,
|
|
"valid_targets_mean": 7890.8,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 5.101394169835235,
|
|
"grad_norm": 0.3781641165861625,
|
|
"learning_rate": 8.326267106555222e-06,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07646860182285309,
|
|
"step": 4025,
|
|
"valid_targets_mean": 7016.6,
|
|
"valid_targets_min": 4960
|
|
},
|
|
{
|
|
"epoch": 5.107731305449937,
|
|
"grad_norm": 0.40647708647014563,
|
|
"learning_rate": 8.274999395075023e-06,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0790894404053688,
|
|
"step": 4030,
|
|
"valid_targets_mean": 6527.1,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 5.114068441064639,
|
|
"grad_norm": 0.5641741645316035,
|
|
"learning_rate": 8.223848805866941e-06,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08160850405693054,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.120405576679341,
|
|
"grad_norm": 0.42315292233047624,
|
|
"learning_rate": 8.172815849879607e-06,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09121018648147583,
|
|
"step": 4040,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 5.126742712294043,
|
|
"grad_norm": 0.4053404420693955,
|
|
"learning_rate": 8.12190103688663e-06,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08342596888542175,
|
|
"step": 4045,
|
|
"valid_targets_mean": 7998.5,
|
|
"valid_targets_min": 5902
|
|
},
|
|
{
|
|
"epoch": 5.133079847908745,
|
|
"grad_norm": 0.4134624883029772,
|
|
"learning_rate": 8.071104875481474e-06,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06699319183826447,
|
|
"step": 4050,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 5015
|
|
},
|
|
{
|
|
"epoch": 5.139416983523447,
|
|
"grad_norm": 0.3979314067373139,
|
|
"learning_rate": 8.020427873072355e-06,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06869306415319443,
|
|
"step": 4055,
|
|
"valid_targets_mean": 6794.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 5.145754119138149,
|
|
"grad_norm": 0.4462407567563465,
|
|
"learning_rate": 7.969870535877231e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09361501783132553,
|
|
"step": 4060,
|
|
"valid_targets_mean": 6656.4,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 5.152091254752852,
|
|
"grad_norm": 0.401092973388301,
|
|
"learning_rate": 7.919433368918676e-06,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07718173414468765,
|
|
"step": 4065,
|
|
"valid_targets_mean": 7178.4,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 5.158428390367554,
|
|
"grad_norm": 0.44249303134972623,
|
|
"learning_rate": 7.869116876018904e-06,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09851060807704926,
|
|
"step": 4070,
|
|
"valid_targets_mean": 7619.8,
|
|
"valid_targets_min": 5737
|
|
},
|
|
{
|
|
"epoch": 5.164765525982256,
|
|
"grad_norm": 0.36106074401654153,
|
|
"learning_rate": 7.81892155979468e-06,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06959091126918793,
|
|
"step": 4075,
|
|
"valid_targets_mean": 8754.4,
|
|
"valid_targets_min": 5752
|
|
},
|
|
{
|
|
"epoch": 5.171102661596958,
|
|
"grad_norm": 0.4369022792603916,
|
|
"learning_rate": 7.768847921652343e-06,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0773264467716217,
|
|
"step": 4080,
|
|
"valid_targets_mean": 6341.0,
|
|
"valid_targets_min": 4543
|
|
},
|
|
{
|
|
"epoch": 5.17743979721166,
|
|
"grad_norm": 0.5774544847564673,
|
|
"learning_rate": 7.718896461782756e-06,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08174264430999756,
|
|
"step": 4085,
|
|
"valid_targets_mean": 7181.9,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 5.183776932826363,
|
|
"grad_norm": 0.7428691252770011,
|
|
"learning_rate": 7.669067679156335e-06,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051164716482162476,
|
|
"step": 4090,
|
|
"valid_targets_mean": 1104.4,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 5.190114068441065,
|
|
"grad_norm": 1.6924819005272178,
|
|
"learning_rate": 7.61936207151807e-06,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07505419105291367,
|
|
"step": 4095,
|
|
"valid_targets_mean": 7018.2,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 5.196451204055767,
|
|
"grad_norm": 0.46731517581274457,
|
|
"learning_rate": 7.5697801353825315e-06,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08700448274612427,
|
|
"step": 4100,
|
|
"valid_targets_mean": 7738.4,
|
|
"valid_targets_min": 5329
|
|
},
|
|
{
|
|
"epoch": 5.202788339670469,
|
|
"grad_norm": 0.45034452101704714,
|
|
"learning_rate": 7.520322366028931e-06,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08378411829471588,
|
|
"step": 4105,
|
|
"valid_targets_mean": 6426.6,
|
|
"valid_targets_min": 5560
|
|
},
|
|
{
|
|
"epoch": 5.2091254752851714,
|
|
"grad_norm": 0.4399063028066905,
|
|
"learning_rate": 7.470989257496164e-06,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0849621444940567,
|
|
"step": 4110,
|
|
"valid_targets_mean": 6169.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 5.215462610899873,
|
|
"grad_norm": 0.419937563598825,
|
|
"learning_rate": 7.4217813025778755e-06,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08585532754659653,
|
|
"step": 4115,
|
|
"valid_targets_mean": 7204.6,
|
|
"valid_targets_min": 5381
|
|
},
|
|
{
|
|
"epoch": 5.221799746514575,
|
|
"grad_norm": 0.41280289221641703,
|
|
"learning_rate": 7.372698992817531e-06,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08676856756210327,
|
|
"step": 4120,
|
|
"valid_targets_mean": 7600.4,
|
|
"valid_targets_min": 5241
|
|
},
|
|
{
|
|
"epoch": 5.228136882129277,
|
|
"grad_norm": 0.43431938532624453,
|
|
"learning_rate": 7.3237428185035145e-06,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08987128734588623,
|
|
"step": 4125,
|
|
"valid_targets_mean": 7098.5,
|
|
"valid_targets_min": 5607
|
|
},
|
|
{
|
|
"epoch": 5.23447401774398,
|
|
"grad_norm": 0.4569640824595294,
|
|
"learning_rate": 7.2749132686642406e-06,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08936619758605957,
|
|
"step": 4130,
|
|
"valid_targets_mean": 6955.1,
|
|
"valid_targets_min": 5530
|
|
},
|
|
{
|
|
"epoch": 5.240811153358682,
|
|
"grad_norm": 0.4135744245501206,
|
|
"learning_rate": 7.226210831063243e-06,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08667145669460297,
|
|
"step": 4135,
|
|
"valid_targets_mean": 6748.1,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 5.247148288973384,
|
|
"grad_norm": 0.4510625715340834,
|
|
"learning_rate": 7.1776359921943295e-06,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08137469738721848,
|
|
"step": 4140,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 5.253485424588086,
|
|
"grad_norm": 0.4029945927012228,
|
|
"learning_rate": 7.1291892372767105e-06,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07805578410625458,
|
|
"step": 4145,
|
|
"valid_targets_mean": 6419.5,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 5.259822560202788,
|
|
"grad_norm": 0.41716769952382066,
|
|
"learning_rate": 7.080871050250151e-06,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07842215150594711,
|
|
"step": 4150,
|
|
"valid_targets_mean": 6483.4,
|
|
"valid_targets_min": 4309
|
|
},
|
|
{
|
|
"epoch": 5.266159695817491,
|
|
"grad_norm": 0.48777274645277524,
|
|
"learning_rate": 7.032681913770123e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09810637682676315,
|
|
"step": 4155,
|
|
"valid_targets_mean": 6825.0,
|
|
"valid_targets_min": 5205
|
|
},
|
|
{
|
|
"epoch": 5.272496831432193,
|
|
"grad_norm": 0.4136277172521258,
|
|
"learning_rate": 6.984622309203028e-06,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07587535679340363,
|
|
"step": 4160,
|
|
"valid_targets_mean": 6729.4,
|
|
"valid_targets_min": 5200
|
|
},
|
|
{
|
|
"epoch": 5.278833967046895,
|
|
"grad_norm": 0.4334383384176948,
|
|
"learning_rate": 6.936692716621336e-06,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.078494593501091,
|
|
"step": 4165,
|
|
"valid_targets_mean": 6094.0,
|
|
"valid_targets_min": 5302
|
|
},
|
|
{
|
|
"epoch": 5.285171102661597,
|
|
"grad_norm": 0.42218078103959833,
|
|
"learning_rate": 6.888893614798804e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08591262251138687,
|
|
"step": 4170,
|
|
"valid_targets_mean": 7140.0,
|
|
"valid_targets_min": 5270
|
|
},
|
|
{
|
|
"epoch": 5.2915082382762995,
|
|
"grad_norm": 0.4552472672714348,
|
|
"learning_rate": 6.841225481205749e-06,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776696979999542,
|
|
"step": 4175,
|
|
"valid_targets_mean": 7425.0,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 5.297845373891001,
|
|
"grad_norm": 0.4306079172442133,
|
|
"learning_rate": 6.7936887920041825e-06,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10531240701675415,
|
|
"step": 4180,
|
|
"valid_targets_mean": 7063.4,
|
|
"valid_targets_min": 5386
|
|
},
|
|
{
|
|
"epoch": 5.304182509505703,
|
|
"grad_norm": 0.42105290762285663,
|
|
"learning_rate": 6.746284022043137e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08438935875892639,
|
|
"step": 4185,
|
|
"valid_targets_mean": 6827.0,
|
|
"valid_targets_min": 4547
|
|
},
|
|
{
|
|
"epoch": 5.310519645120405,
|
|
"grad_norm": 0.6552694795083986,
|
|
"learning_rate": 6.6990116448538635e-06,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04010789096355438,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2047.0,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 5.316856780735108,
|
|
"grad_norm": 0.4654245734769888,
|
|
"learning_rate": 6.651872132645156e-06,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07815557718276978,
|
|
"step": 4195,
|
|
"valid_targets_mean": 6754.1,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 5.32319391634981,
|
|
"grad_norm": 0.4535492018207918,
|
|
"learning_rate": 6.604865956298581e-06,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08099634945392609,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5985.6,
|
|
"valid_targets_min": 5028
|
|
},
|
|
{
|
|
"epoch": 5.329531051964512,
|
|
"grad_norm": 0.44627308207014005,
|
|
"learning_rate": 6.557993585363809e-06,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07561885565519333,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 5237
|
|
},
|
|
{
|
|
"epoch": 5.335868187579214,
|
|
"grad_norm": 0.44998746744931617,
|
|
"learning_rate": 6.511255488053927e-06,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541455119848251,
|
|
"step": 4210,
|
|
"valid_targets_mean": 6319.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 5.342205323193916,
|
|
"grad_norm": 0.7219428831787981,
|
|
"learning_rate": 6.464652131240723e-06,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0667627677321434,
|
|
"step": 4215,
|
|
"valid_targets_mean": 7125.8,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 5.348542458808619,
|
|
"grad_norm": 0.43946617028841406,
|
|
"learning_rate": 6.41818398045007e-06,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08088171482086182,
|
|
"step": 4220,
|
|
"valid_targets_mean": 6123.5,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 5.354879594423321,
|
|
"grad_norm": 0.47733100622138097,
|
|
"learning_rate": 6.371851499857236e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041512034833431244,
|
|
"step": 4225,
|
|
"valid_targets_mean": 2616.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.361216730038023,
|
|
"grad_norm": 0.5866983708163925,
|
|
"learning_rate": 6.325655152282284e-06,
|
|
"loss": 0.1011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03837406635284424,
|
|
"step": 4230,
|
|
"valid_targets_mean": 1793.2,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.367553865652725,
|
|
"grad_norm": 0.3845759614541665,
|
|
"learning_rate": 6.279595399185408e-06,
|
|
"loss": 0.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0446813702583313,
|
|
"step": 4235,
|
|
"valid_targets_mean": 2705.1,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 5.3738910012674275,
|
|
"grad_norm": 0.4258600372652864,
|
|
"learning_rate": 6.2336727006623635e-06,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04484102129936218,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3501.9,
|
|
"valid_targets_min": 2479
|
|
},
|
|
{
|
|
"epoch": 5.380228136882129,
|
|
"grad_norm": 0.38519999787332326,
|
|
"learning_rate": 6.187887515439852e-06,
|
|
"loss": 0.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028960973024368286,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3338.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 5.386565272496831,
|
|
"grad_norm": 0.3834076951369149,
|
|
"learning_rate": 6.1422403008709255e-06,
|
|
"loss": 0.0766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03624745085835457,
|
|
"step": 4250,
|
|
"valid_targets_mean": 2920.2,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 5.392902408111533,
|
|
"grad_norm": 0.69957738040738,
|
|
"learning_rate": 6.096731512930452e-06,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0720585286617279,
|
|
"step": 4255,
|
|
"valid_targets_mean": 1602.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.399239543726236,
|
|
"grad_norm": 0.4186578598248456,
|
|
"learning_rate": 6.051361606210517e-06,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03893708437681198,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3130.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.405576679340938,
|
|
"grad_norm": 0.4577733098620005,
|
|
"learning_rate": 6.006131033915936e-06,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04968301206827164,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 5.41191381495564,
|
|
"grad_norm": 0.4569164829556428,
|
|
"learning_rate": 5.9610402478596725e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07093412429094315,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 5.418250950570342,
|
|
"grad_norm": 0.41888754042809373,
|
|
"learning_rate": 5.916089698458365e-06,
|
|
"loss": 0.0604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03863076865673065,
|
|
"step": 4275,
|
|
"valid_targets_mean": 2932.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.424588086185044,
|
|
"grad_norm": 0.46893557126297947,
|
|
"learning_rate": 5.8712798347278075e-06,
|
|
"loss": 0.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04804755747318268,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2507.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.430925221799747,
|
|
"grad_norm": 0.39566534457302754,
|
|
"learning_rate": 5.8266111042784814e-06,
|
|
"loss": 0.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04610683023929596,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3089.9,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.437262357414449,
|
|
"grad_norm": 0.592061766233479,
|
|
"learning_rate": 5.782083953311055e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15467888116836548,
|
|
"step": 4290,
|
|
"valid_targets_mean": 2035.1,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 5.443599493029151,
|
|
"grad_norm": 0.4181751857962557,
|
|
"learning_rate": 5.737698826611948e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03835096210241318,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3800.2,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 5.449936628643853,
|
|
"grad_norm": 0.42529811688986974,
|
|
"learning_rate": 5.693456167548894e-06,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04745566099882126,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3504.6,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 5.4562737642585555,
|
|
"grad_norm": 0.4328646888563543,
|
|
"learning_rate": 5.649356418066499e-06,
|
|
"loss": 0.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03368465229868889,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3088.0,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.462610899873257,
|
|
"grad_norm": 0.47769329901960367,
|
|
"learning_rate": 5.605400018681831e-06,
|
|
"loss": 0.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044492196291685104,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3463.5,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 5.468948035487959,
|
|
"grad_norm": 0.3898568700401151,
|
|
"learning_rate": 5.561587408480007e-06,
|
|
"loss": 0.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028195835649967194,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3875.4,
|
|
"valid_targets_min": 3245
|
|
},
|
|
{
|
|
"epoch": 5.475285171102661,
|
|
"grad_norm": 0.5210331561059278,
|
|
"learning_rate": 5.517919025109839e-06,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04817694425582886,
|
|
"step": 4320,
|
|
"valid_targets_mean": 2261.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 5.481622306717364,
|
|
"grad_norm": 0.5168297453829634,
|
|
"learning_rate": 5.474395304779418e-06,
|
|
"loss": 0.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038807213306427,
|
|
"step": 4325,
|
|
"valid_targets_mean": 1559.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.487959442332066,
|
|
"grad_norm": 0.5547889967788058,
|
|
"learning_rate": 5.431016682251809e-06,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08085589110851288,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3198.4,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 5.494296577946768,
|
|
"grad_norm": 0.4087227452305409,
|
|
"learning_rate": 5.387783590840645e-06,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0354684442281723,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 5.50063371356147,
|
|
"grad_norm": 0.4804446540865793,
|
|
"learning_rate": 5.344696462405865e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040328770875930786,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3100.1,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 5.506970849176172,
|
|
"grad_norm": 0.41817721803567215,
|
|
"learning_rate": 5.301755727349358e-06,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034907370805740356,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3255.4,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 5.513307984790875,
|
|
"grad_norm": 0.4146512596529761,
|
|
"learning_rate": 5.258961814610659e-06,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027002450078725815,
|
|
"step": 4350,
|
|
"valid_targets_mean": 2862.5,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.519645120405577,
|
|
"grad_norm": 0.5697772159094903,
|
|
"learning_rate": 5.216315151662705e-06,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033459559082984924,
|
|
"step": 4355,
|
|
"valid_targets_mean": 1282.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 5.525982256020279,
|
|
"grad_norm": 0.6007023023897675,
|
|
"learning_rate": 5.173816164507517e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378200352191925,
|
|
"step": 4360,
|
|
"valid_targets_mean": 2626.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.532319391634981,
|
|
"grad_norm": 0.5853313934698544,
|
|
"learning_rate": 5.131465277671985e-06,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033259209245443344,
|
|
"step": 4365,
|
|
"valid_targets_mean": 1323.9,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 5.5386565272496835,
|
|
"grad_norm": 0.5291514120225208,
|
|
"learning_rate": 5.089262914203587e-06,
|
|
"loss": 0.0834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04345552250742912,
|
|
"step": 4370,
|
|
"valid_targets_mean": 1575.5,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.544993662864385,
|
|
"grad_norm": 0.47797969728051876,
|
|
"learning_rate": 5.04720949566621e-06,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05293070152401924,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4513.2,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 5.551330798479087,
|
|
"grad_norm": 0.28154622187704254,
|
|
"learning_rate": 5.005305442135904e-06,
|
|
"loss": 0.0708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020900988951325417,
|
|
"step": 4380,
|
|
"valid_targets_mean": 4744.9,
|
|
"valid_targets_min": 3529
|
|
},
|
|
{
|
|
"epoch": 5.557667934093789,
|
|
"grad_norm": 0.42259353066880834,
|
|
"learning_rate": 4.963551172196686e-06,
|
|
"loss": 0.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04330167919397354,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4400.1,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 5.564005069708491,
|
|
"grad_norm": 0.45888359504926873,
|
|
"learning_rate": 4.921947102936388e-06,
|
|
"loss": 0.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043767012655735016,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3865.4,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 5.570342205323194,
|
|
"grad_norm": 0.4045457590336565,
|
|
"learning_rate": 4.880493649942451e-06,
|
|
"loss": 0.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037023529410362244,
|
|
"step": 4395,
|
|
"valid_targets_mean": 3094.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 5.576679340937896,
|
|
"grad_norm": 0.4354374383894185,
|
|
"learning_rate": 4.839191227297811e-06,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032839491963386536,
|
|
"step": 4400,
|
|
"valid_targets_mean": 2178.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.583016476552598,
|
|
"grad_norm": 0.4589894088387192,
|
|
"learning_rate": 4.7980402475767384e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034980446100234985,
|
|
"step": 4405,
|
|
"valid_targets_mean": 2657.1,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 5.589353612167301,
|
|
"grad_norm": 0.3484002140729273,
|
|
"learning_rate": 4.757041121840731e-06,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031158069148659706,
|
|
"step": 4410,
|
|
"valid_targets_mean": 3808.5,
|
|
"valid_targets_min": 3278
|
|
},
|
|
{
|
|
"epoch": 5.595690747782003,
|
|
"grad_norm": 0.4812857303956409,
|
|
"learning_rate": 4.71619425963439e-06,
|
|
"loss": 0.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03770197927951813,
|
|
"step": 4415,
|
|
"valid_targets_mean": 1776.9,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.602027883396705,
|
|
"grad_norm": 0.5828577230319666,
|
|
"learning_rate": 4.67550006898134e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04305993765592575,
|
|
"step": 4420,
|
|
"valid_targets_mean": 1609.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.608365019011407,
|
|
"grad_norm": 0.43328538779867565,
|
|
"learning_rate": 4.6349589563801715e-06,
|
|
"loss": 0.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02708401530981064,
|
|
"step": 4425,
|
|
"valid_targets_mean": 1540.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 5.614702154626109,
|
|
"grad_norm": 0.4707114505957345,
|
|
"learning_rate": 4.5945713268003364e-06,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0396789014339447,
|
|
"step": 4430,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 5.6210392902408115,
|
|
"grad_norm": 0.49308019926711744,
|
|
"learning_rate": 4.5543375836781475e-06,
|
|
"loss": 0.0631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035800088196992874,
|
|
"step": 4435,
|
|
"valid_targets_mean": 2824.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.6273764258555135,
|
|
"grad_norm": 0.4530825505369907,
|
|
"learning_rate": 4.514258128912725e-06,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040187858045101166,
|
|
"step": 4440,
|
|
"valid_targets_mean": 3541.8,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 5.633713561470215,
|
|
"grad_norm": 0.5072834805176266,
|
|
"learning_rate": 4.474333362861987e-06,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04501301795244217,
|
|
"step": 4445,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.640050697084917,
|
|
"grad_norm": 0.4084492866844528,
|
|
"learning_rate": 4.434563684338639e-06,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03324810788035393,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3878.9,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 5.64638783269962,
|
|
"grad_norm": 0.5223919785393708,
|
|
"learning_rate": 4.39494949060622e-06,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033035777509212494,
|
|
"step": 4455,
|
|
"valid_targets_mean": 1184.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 5.652724968314322,
|
|
"grad_norm": 0.5601580045520027,
|
|
"learning_rate": 4.3554911773751e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08346060663461685,
|
|
"step": 4460,
|
|
"valid_targets_mean": 3220.4,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 5.659062103929024,
|
|
"grad_norm": 0.4755655773544729,
|
|
"learning_rate": 4.3161891387985366e-06,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039506327360868454,
|
|
"step": 4465,
|
|
"valid_targets_mean": 3219.5,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 5.665399239543726,
|
|
"grad_norm": 0.6214327405285723,
|
|
"learning_rate": 4.277043767468765e-06,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03844623267650604,
|
|
"step": 4470,
|
|
"valid_targets_mean": 1672.2,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 5.671736375158428,
|
|
"grad_norm": 0.5256269042718799,
|
|
"learning_rate": 4.238055454413037e-06,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030807897448539734,
|
|
"step": 4475,
|
|
"valid_targets_mean": 1181.4,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 5.678073510773131,
|
|
"grad_norm": 0.35593890274850026,
|
|
"learning_rate": 4.199224589089748e-06,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03399336338043213,
|
|
"step": 4480,
|
|
"valid_targets_mean": 3762.2,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 5.684410646387833,
|
|
"grad_norm": 0.7105910317632537,
|
|
"learning_rate": 4.1605515593845095e-06,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051422249525785446,
|
|
"step": 4485,
|
|
"valid_targets_mean": 2832.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 5.690747782002535,
|
|
"grad_norm": 0.3702799708362179,
|
|
"learning_rate": 4.122036751606318e-06,
|
|
"loss": 0.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02763994038105011,
|
|
"step": 4490,
|
|
"valid_targets_mean": 3865.2,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.697084917617237,
|
|
"grad_norm": 0.44487590802346777,
|
|
"learning_rate": 4.083680550483657e-06,
|
|
"loss": 0.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040048375725746155,
|
|
"step": 4495,
|
|
"valid_targets_mean": 3961.6,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 5.7034220532319395,
|
|
"grad_norm": 0.48006485522837344,
|
|
"learning_rate": 4.0454833391606875e-06,
|
|
"loss": 0.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03742187097668648,
|
|
"step": 4500,
|
|
"valid_targets_mean": 2066.0,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.7097591888466415,
|
|
"grad_norm": 0.5033214580391046,
|
|
"learning_rate": 4.007445499193389e-06,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037131235003471375,
|
|
"step": 4505,
|
|
"valid_targets_mean": 1982.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.716096324461343,
|
|
"grad_norm": 0.468655859314315,
|
|
"learning_rate": 3.969567410545774e-06,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042156293988227844,
|
|
"step": 4510,
|
|
"valid_targets_mean": 3687.4,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 5.722433460076045,
|
|
"grad_norm": 0.38257980786963774,
|
|
"learning_rate": 3.931849451586085e-06,
|
|
"loss": 0.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03761179372668266,
|
|
"step": 4515,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 5.728770595690747,
|
|
"grad_norm": 0.4375680492107314,
|
|
"learning_rate": 3.894291999082998e-06,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03292831405997276,
|
|
"step": 4520,
|
|
"valid_targets_mean": 1100.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 5.73510773130545,
|
|
"grad_norm": 0.4986542584132164,
|
|
"learning_rate": 3.856895428201888e-06,
|
|
"loss": 0.0835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03635839372873306,
|
|
"step": 4525,
|
|
"valid_targets_mean": 3067.5,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.741444866920152,
|
|
"grad_norm": 0.3620698647395571,
|
|
"learning_rate": 3.819660112501053e-06,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03209828585386276,
|
|
"step": 4530,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 3301
|
|
},
|
|
{
|
|
"epoch": 5.747782002534854,
|
|
"grad_norm": 0.3867395070661232,
|
|
"learning_rate": 3.782586423928005e-06,
|
|
"loss": 0.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029267754405736923,
|
|
"step": 4535,
|
|
"valid_targets_mean": 2841.2,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.754119138149557,
|
|
"grad_norm": 0.4503620629477804,
|
|
"learning_rate": 3.745674732815752e-06,
|
|
"loss": 0.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032203756272792816,
|
|
"step": 4540,
|
|
"valid_targets_mean": 2733.4,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.760456273764259,
|
|
"grad_norm": 0.7359771664227186,
|
|
"learning_rate": 3.7089254078790717e-06,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04714491218328476,
|
|
"step": 4545,
|
|
"valid_targets_mean": 1322.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.766793409378961,
|
|
"grad_norm": 0.4920388254908045,
|
|
"learning_rate": 3.6723388162108673e-06,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027474645525217056,
|
|
"step": 4550,
|
|
"valid_targets_mean": 3263.9,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 5.773130544993663,
|
|
"grad_norm": 0.7918204760525981,
|
|
"learning_rate": 3.635915323278472e-06,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05429510772228241,
|
|
"step": 4555,
|
|
"valid_targets_mean": 1634.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.779467680608365,
|
|
"grad_norm": 0.44003825392264573,
|
|
"learning_rate": 3.5996552929200167e-06,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03652956336736679,
|
|
"step": 4560,
|
|
"valid_targets_mean": 2644.0,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 5.7858048162230675,
|
|
"grad_norm": 0.45749382422432305,
|
|
"learning_rate": 3.5635590873407753e-06,
|
|
"loss": 0.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03557366877794266,
|
|
"step": 4565,
|
|
"valid_targets_mean": 3539.0,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 5.7921419518377695,
|
|
"grad_norm": 0.4452277252780396,
|
|
"learning_rate": 3.5276270671095713e-06,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03005984053015709,
|
|
"step": 4570,
|
|
"valid_targets_mean": 2930.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 5.798479087452471,
|
|
"grad_norm": 0.45607469168777787,
|
|
"learning_rate": 3.491859591155158e-06,
|
|
"loss": 0.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031239572912454605,
|
|
"step": 4575,
|
|
"valid_targets_mean": 1992.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 5.804816223067173,
|
|
"grad_norm": 0.5737760929337632,
|
|
"learning_rate": 3.4562570167626407e-06,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03960079699754715,
|
|
"step": 4580,
|
|
"valid_targets_mean": 2636.1,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 5.811153358681876,
|
|
"grad_norm": 0.40931546128860985,
|
|
"learning_rate": 3.4208196995699015e-06,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02952323481440544,
|
|
"step": 4585,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 5.817490494296578,
|
|
"grad_norm": 0.5839453011552098,
|
|
"learning_rate": 3.385547993564049e-06,
|
|
"loss": 0.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04208548367023468,
|
|
"step": 4590,
|
|
"valid_targets_mean": 2723.0,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.82382762991128,
|
|
"grad_norm": 0.4081672024057598,
|
|
"learning_rate": 3.3504422510778946e-06,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039170943200588226,
|
|
"step": 4595,
|
|
"valid_targets_mean": 4528.9,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 5.830164765525982,
|
|
"grad_norm": 0.3465791597444587,
|
|
"learning_rate": 3.315502822786407e-06,
|
|
"loss": 0.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025992441922426224,
|
|
"step": 4600,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.836501901140684,
|
|
"grad_norm": 0.427725489884492,
|
|
"learning_rate": 3.280730057703239e-06,
|
|
"loss": 0.0589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02787669003009796,
|
|
"step": 4605,
|
|
"valid_targets_mean": 2657.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.842839036755387,
|
|
"grad_norm": 0.3469672260861474,
|
|
"learning_rate": 3.2461243031772204e-06,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036878298968076706,
|
|
"step": 4610,
|
|
"valid_targets_mean": 4010.0,
|
|
"valid_targets_min": 3213
|
|
},
|
|
{
|
|
"epoch": 5.849176172370089,
|
|
"grad_norm": 0.4388673144418931,
|
|
"learning_rate": 3.2116859048889037e-06,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1055724024772644,
|
|
"step": 4615,
|
|
"valid_targets_mean": 2696.0,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 5.855513307984791,
|
|
"grad_norm": 0.43073708893470974,
|
|
"learning_rate": 3.177415206847083e-06,
|
|
"loss": 0.0809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034607693552970886,
|
|
"step": 4620,
|
|
"valid_targets_mean": 2255.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.861850443599493,
|
|
"grad_norm": 0.3318905823239535,
|
|
"learning_rate": 3.143312551385398e-06,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02005249261856079,
|
|
"step": 4625,
|
|
"valid_targets_mean": 3199.2,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.8681875792141955,
|
|
"grad_norm": 0.40759970113239813,
|
|
"learning_rate": 3.1093782791588745e-06,
|
|
"loss": 0.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034002259373664856,
|
|
"step": 4630,
|
|
"valid_targets_mean": 3062.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 5.8745247148288975,
|
|
"grad_norm": 0.44946633606475833,
|
|
"learning_rate": 3.0756127291405534e-06,
|
|
"loss": 0.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032798804342746735,
|
|
"step": 4635,
|
|
"valid_targets_mean": 2574.4,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 5.880861850443599,
|
|
"grad_norm": 1.024160627288361,
|
|
"learning_rate": 3.0420162386180974e-06,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043387867510318756,
|
|
"step": 4640,
|
|
"valid_targets_mean": 2292.9,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 5.887198986058301,
|
|
"grad_norm": 0.386376044229599,
|
|
"learning_rate": 3.00858914319039e-06,
|
|
"loss": 0.0664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02493395283818245,
|
|
"step": 4645,
|
|
"valid_targets_mean": 3488.0,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 5.893536121673003,
|
|
"grad_norm": 0.5810949833653366,
|
|
"learning_rate": 2.9753317767642386e-06,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0471031554043293,
|
|
"step": 4650,
|
|
"valid_targets_mean": 2133.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.899873257287706,
|
|
"grad_norm": 0.5751128911085843,
|
|
"learning_rate": 2.9422444715509878e-06,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057598330080509186,
|
|
"step": 4655,
|
|
"valid_targets_mean": 1668.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 5.906210392902408,
|
|
"grad_norm": 0.46928428869700983,
|
|
"learning_rate": 2.9093275580632417e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03717747703194618,
|
|
"step": 4660,
|
|
"valid_targets_mean": 3523.0,
|
|
"valid_targets_min": 2784
|
|
},
|
|
{
|
|
"epoch": 5.91254752851711,
|
|
"grad_norm": 0.4256914123097692,
|
|
"learning_rate": 2.8765813651115214e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031239502131938934,
|
|
"step": 4665,
|
|
"valid_targets_mean": 3526.4,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 5.918884664131813,
|
|
"grad_norm": 0.4318749527266825,
|
|
"learning_rate": 2.8440062198010187e-06,
|
|
"loss": 0.0694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026867706328630447,
|
|
"step": 4670,
|
|
"valid_targets_mean": 2854.1,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 5.925221799746515,
|
|
"grad_norm": 0.5468607661315772,
|
|
"learning_rate": 2.8116024475283165e-06,
|
|
"loss": 0.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032837942242622375,
|
|
"step": 4675,
|
|
"valid_targets_mean": 1380.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 5.931558935361217,
|
|
"grad_norm": 1.164164629247682,
|
|
"learning_rate": 2.779370371978112e-06,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06143220141530037,
|
|
"step": 4680,
|
|
"valid_targets_mean": 1306.1,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.937896070975919,
|
|
"grad_norm": 0.9323886173526637,
|
|
"learning_rate": 2.7473103151200332e-06,
|
|
"loss": 0.0623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03481882065534592,
|
|
"step": 4685,
|
|
"valid_targets_mean": 2619.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.944233206590621,
|
|
"grad_norm": 0.3991799568629183,
|
|
"learning_rate": 2.7154225972053682e-06,
|
|
"loss": 0.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030275344848632812,
|
|
"step": 4690,
|
|
"valid_targets_mean": 3353.8,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 5.9505703422053235,
|
|
"grad_norm": 0.45427849721241265,
|
|
"learning_rate": 2.6837075367639175e-06,
|
|
"loss": 0.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03076854906976223,
|
|
"step": 4695,
|
|
"valid_targets_mean": 3196.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 5.9569074778200255,
|
|
"grad_norm": 0.37800478999348736,
|
|
"learning_rate": 2.65216545060077e-06,
|
|
"loss": 0.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032119669020175934,
|
|
"step": 4700,
|
|
"valid_targets_mean": 5146.8,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 5.9632446134347274,
|
|
"grad_norm": 0.3893878244710302,
|
|
"learning_rate": 2.620796653793165e-06,
|
|
"loss": 0.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03437361121177673,
|
|
"step": 4705,
|
|
"valid_targets_mean": 3559.5,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 5.969581749049429,
|
|
"grad_norm": 0.5921290521999178,
|
|
"learning_rate": 2.5896014596873385e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0389363132417202,
|
|
"step": 4710,
|
|
"valid_targets_mean": 1290.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.975918884664132,
|
|
"grad_norm": 0.6134202717443759,
|
|
"learning_rate": 2.558580179895378e-06,
|
|
"loss": 0.092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07570131123065948,
|
|
"step": 4715,
|
|
"valid_targets_mean": 2540.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.982256020278834,
|
|
"grad_norm": 0.3715957636381195,
|
|
"learning_rate": 2.52773312429214e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0335468165576458,
|
|
"step": 4720,
|
|
"valid_targets_mean": 3372.6,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 5.988593155893536,
|
|
"grad_norm": 0.42132599939086796,
|
|
"learning_rate": 2.497060601012118e-06,
|
|
"loss": 0.0717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038924604654312134,
|
|
"step": 4725,
|
|
"valid_targets_mean": 4056.1,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 5.994930291508238,
|
|
"grad_norm": 0.3837174572429977,
|
|
"learning_rate": 2.4665629164464045e-06,
|
|
"loss": 0.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035894595086574554,
|
|
"step": 4730,
|
|
"valid_targets_mean": 3928.2,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 6.00126742712294,
|
|
"grad_norm": 0.7076679782660124,
|
|
"learning_rate": 2.4362403752395846e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09320268034934998,
|
|
"step": 4735,
|
|
"valid_targets_mean": 8215.2,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 6.007604562737643,
|
|
"grad_norm": 0.8777585347780718,
|
|
"learning_rate": 2.4060932802867498e-06,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09791034460067749,
|
|
"step": 4740,
|
|
"valid_targets_mean": 8594.4,
|
|
"valid_targets_min": 6006
|
|
},
|
|
{
|
|
"epoch": 6.013941698352345,
|
|
"grad_norm": 0.7550108989584415,
|
|
"learning_rate": 2.3761219327304087e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07401634752750397,
|
|
"step": 4745,
|
|
"valid_targets_mean": 3578.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 6.020278833967047,
|
|
"grad_norm": 0.5865556448960597,
|
|
"learning_rate": 2.346326631957532e-06,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08831711858510971,
|
|
"step": 4750,
|
|
"valid_targets_mean": 7019.9,
|
|
"valid_targets_min": 4985
|
|
},
|
|
{
|
|
"epoch": 6.026615969581749,
|
|
"grad_norm": 0.5034300565122007,
|
|
"learning_rate": 2.3167076755965325e-06,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08411325514316559,
|
|
"step": 4755,
|
|
"valid_targets_mean": 6956.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 6.032953105196452,
|
|
"grad_norm": 0.45731941770581797,
|
|
"learning_rate": 2.2872653595142925e-06,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0800948515534401,
|
|
"step": 4760,
|
|
"valid_targets_mean": 7361.6,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 6.0392902408111535,
|
|
"grad_norm": 0.516983442444972,
|
|
"learning_rate": 2.257999977813228e-06,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10671978443861008,
|
|
"step": 4765,
|
|
"valid_targets_mean": 7058.5,
|
|
"valid_targets_min": 5985
|
|
},
|
|
{
|
|
"epoch": 6.0456273764258555,
|
|
"grad_norm": 0.44711673877433067,
|
|
"learning_rate": 2.2289118228283367e-06,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09534800052642822,
|
|
"step": 4770,
|
|
"valid_targets_mean": 7664.2,
|
|
"valid_targets_min": 5169
|
|
},
|
|
{
|
|
"epoch": 6.051964512040557,
|
|
"grad_norm": 0.44239013615064315,
|
|
"learning_rate": 2.200001185124281e-06,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08301720023155212,
|
|
"step": 4775,
|
|
"valid_targets_mean": 7111.8,
|
|
"valid_targets_min": 5348
|
|
},
|
|
{
|
|
"epoch": 6.05830164765526,
|
|
"grad_norm": 0.9529119093149885,
|
|
"learning_rate": 2.1712683534924727e-06,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07151301205158234,
|
|
"step": 4780,
|
|
"valid_targets_mean": 4238.9,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 6.064638783269962,
|
|
"grad_norm": 0.43951867367094133,
|
|
"learning_rate": 2.142713614948215e-06,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08399534225463867,
|
|
"step": 4785,
|
|
"valid_targets_mean": 7199.4,
|
|
"valid_targets_min": 4258
|
|
},
|
|
{
|
|
"epoch": 6.070975918884664,
|
|
"grad_norm": 0.4242055588078155,
|
|
"learning_rate": 2.1143372547278097e-06,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08014696836471558,
|
|
"step": 4790,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 5831
|
|
},
|
|
{
|
|
"epoch": 6.077313054499366,
|
|
"grad_norm": 0.43973721555108014,
|
|
"learning_rate": 2.0861395562857247e-06,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08203932642936707,
|
|
"step": 4795,
|
|
"valid_targets_mean": 6950.6,
|
|
"valid_targets_min": 3862
|
|
},
|
|
{
|
|
"epoch": 6.083650190114068,
|
|
"grad_norm": 0.4243320295575023,
|
|
"learning_rate": 2.058120801291752e-06,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08435851335525513,
|
|
"step": 4800,
|
|
"valid_targets_mean": 6991.9,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 6.089987325728771,
|
|
"grad_norm": 0.4275396903952467,
|
|
"learning_rate": 2.0302812696282003e-06,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645622432231903,
|
|
"step": 4805,
|
|
"valid_targets_mean": 6963.5,
|
|
"valid_targets_min": 4947
|
|
},
|
|
{
|
|
"epoch": 6.096324461343473,
|
|
"grad_norm": 0.39723160576551036,
|
|
"learning_rate": 2.0026212393871057e-06,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07435034215450287,
|
|
"step": 4810,
|
|
"valid_targets_mean": 6436.9,
|
|
"valid_targets_min": 4988
|
|
},
|
|
{
|
|
"epoch": 6.102661596958175,
|
|
"grad_norm": 0.40169963686560883,
|
|
"learning_rate": 1.9751409868674297e-06,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07919090986251831,
|
|
"step": 4815,
|
|
"valid_targets_mean": 7518.0,
|
|
"valid_targets_min": 5476
|
|
},
|
|
{
|
|
"epoch": 6.108998732572877,
|
|
"grad_norm": 0.3751730188160912,
|
|
"learning_rate": 1.947840786572328e-06,
|
|
"loss": 0.1562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07387060672044754,
|
|
"step": 4820,
|
|
"valid_targets_mean": 7422.6,
|
|
"valid_targets_min": 5226
|
|
},
|
|
{
|
|
"epoch": 6.11533586818758,
|
|
"grad_norm": 0.46748552052719183,
|
|
"learning_rate": 1.9207209112063883e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10331843048334122,
|
|
"step": 4825,
|
|
"valid_targets_mean": 7080.0,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 6.1216730038022815,
|
|
"grad_norm": 0.4932002133504262,
|
|
"learning_rate": 1.8937816316729195e-06,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09357616305351257,
|
|
"step": 4830,
|
|
"valid_targets_mean": 6064.6,
|
|
"valid_targets_min": 5161
|
|
},
|
|
{
|
|
"epoch": 6.1280101394169835,
|
|
"grad_norm": 0.4167566886302276,
|
|
"learning_rate": 1.867023217071231e-06,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07850947976112366,
|
|
"step": 4835,
|
|
"valid_targets_mean": 7388.1,
|
|
"valid_targets_min": 5795
|
|
},
|
|
{
|
|
"epoch": 6.134347275031685,
|
|
"grad_norm": 0.4309377764849422,
|
|
"learning_rate": 1.8404459346939597e-06,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07340958714485168,
|
|
"step": 4840,
|
|
"valid_targets_mean": 7696.0,
|
|
"valid_targets_min": 4658
|
|
},
|
|
{
|
|
"epoch": 6.140684410646388,
|
|
"grad_norm": 0.427447738999388,
|
|
"learning_rate": 1.8140500500244007e-06,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08510193228721619,
|
|
"step": 4845,
|
|
"valid_targets_mean": 7799.1,
|
|
"valid_targets_min": 5508
|
|
},
|
|
{
|
|
"epoch": 6.14702154626109,
|
|
"grad_norm": 0.45075627908678856,
|
|
"learning_rate": 1.787835826733828e-06,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0814749002456665,
|
|
"step": 4850,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 6454
|
|
},
|
|
{
|
|
"epoch": 6.153358681875792,
|
|
"grad_norm": 0.4226624318613456,
|
|
"learning_rate": 1.7618035266789046e-06,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06940828263759613,
|
|
"step": 4855,
|
|
"valid_targets_mean": 6818.0,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 6.159695817490494,
|
|
"grad_norm": 0.3891341794013059,
|
|
"learning_rate": 1.7359534098990184e-06,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06927552819252014,
|
|
"step": 4860,
|
|
"valid_targets_mean": 7857.6,
|
|
"valid_targets_min": 5394
|
|
},
|
|
{
|
|
"epoch": 6.166032953105196,
|
|
"grad_norm": 0.36794823276700567,
|
|
"learning_rate": 1.7102857346137303e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06515541672706604,
|
|
"step": 4865,
|
|
"valid_targets_mean": 8314.1,
|
|
"valid_targets_min": 4837
|
|
},
|
|
{
|
|
"epoch": 6.172370088719899,
|
|
"grad_norm": 0.3465841011135408,
|
|
"learning_rate": 1.684800757220164e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05954679101705551,
|
|
"step": 4870,
|
|
"valid_targets_mean": 7368.2,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 6.178707224334601,
|
|
"grad_norm": 0.43131577755636846,
|
|
"learning_rate": 1.659498732290461e-06,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07968258857727051,
|
|
"step": 4875,
|
|
"valid_targets_mean": 6435.0,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 6.185044359949303,
|
|
"grad_norm": 1.2533173526349402,
|
|
"learning_rate": 1.6343799125692194e-06,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03792010247707367,
|
|
"step": 4880,
|
|
"valid_targets_mean": 247.8,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 6.191381495564005,
|
|
"grad_norm": 0.4276113607178055,
|
|
"learning_rate": 1.6094445489709886e-06,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07608437538146973,
|
|
"step": 4885,
|
|
"valid_targets_mean": 6807.0,
|
|
"valid_targets_min": 5165
|
|
},
|
|
{
|
|
"epoch": 6.197718631178708,
|
|
"grad_norm": 0.5187139802134315,
|
|
"learning_rate": 1.5846928905777591e-06,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08837427198886871,
|
|
"step": 4890,
|
|
"valid_targets_mean": 6989.9,
|
|
"valid_targets_min": 5104
|
|
},
|
|
{
|
|
"epoch": 6.2040557667934095,
|
|
"grad_norm": 0.46208212869814186,
|
|
"learning_rate": 1.5601251846364607e-06,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0903572216629982,
|
|
"step": 4895,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 5068
|
|
},
|
|
{
|
|
"epoch": 6.2103929024081115,
|
|
"grad_norm": 0.4485098071080024,
|
|
"learning_rate": 1.5357416765565125e-06,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09870140254497528,
|
|
"step": 4900,
|
|
"valid_targets_mean": 7419.6,
|
|
"valid_targets_min": 5578
|
|
},
|
|
{
|
|
"epoch": 6.216730038022813,
|
|
"grad_norm": 0.4046176023526548,
|
|
"learning_rate": 1.5115426099073594e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08085937798023224,
|
|
"step": 4905,
|
|
"valid_targets_mean": 7536.5,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 6.223067173637516,
|
|
"grad_norm": 0.40193740084664736,
|
|
"learning_rate": 1.4875282264160374e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07566690444946289,
|
|
"step": 4910,
|
|
"valid_targets_mean": 7221.5,
|
|
"valid_targets_min": 5841
|
|
},
|
|
{
|
|
"epoch": 6.229404309252218,
|
|
"grad_norm": 0.39775558692231483,
|
|
"learning_rate": 1.4636987659647628e-06,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07742279767990112,
|
|
"step": 4915,
|
|
"valid_targets_mean": 7287.5,
|
|
"valid_targets_min": 5151
|
|
},
|
|
{
|
|
"epoch": 6.23574144486692,
|
|
"grad_norm": 0.47662481480755914,
|
|
"learning_rate": 1.440054466588543e-06,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0780804455280304,
|
|
"step": 4920,
|
|
"valid_targets_mean": 6738.4,
|
|
"valid_targets_min": 4042
|
|
},
|
|
{
|
|
"epoch": 6.242078580481622,
|
|
"grad_norm": 0.4756427267957198,
|
|
"learning_rate": 1.4165955644727868e-06,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08917786180973053,
|
|
"step": 4925,
|
|
"valid_targets_mean": 6331.0,
|
|
"valid_targets_min": 5496
|
|
},
|
|
{
|
|
"epoch": 6.248415716096324,
|
|
"grad_norm": 0.437959820207839,
|
|
"learning_rate": 1.3933222939509429e-06,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07916714251041412,
|
|
"step": 4930,
|
|
"valid_targets_mean": 6271.1,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 6.254752851711027,
|
|
"grad_norm": 0.4929697470555312,
|
|
"learning_rate": 1.370234887502182e-06,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08323325216770172,
|
|
"step": 4935,
|
|
"valid_targets_mean": 5790.8,
|
|
"valid_targets_min": 5021
|
|
},
|
|
{
|
|
"epoch": 6.261089987325729,
|
|
"grad_norm": 0.4948613674368671,
|
|
"learning_rate": 1.3473335757490458e-06,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05985357612371445,
|
|
"step": 4940,
|
|
"valid_targets_mean": 3386.8,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 6.267427122940431,
|
|
"grad_norm": 0.45814184025791604,
|
|
"learning_rate": 1.3246185874551688e-06,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07920309901237488,
|
|
"step": 4945,
|
|
"valid_targets_mean": 6240.5,
|
|
"valid_targets_min": 5093
|
|
},
|
|
{
|
|
"epoch": 6.273764258555133,
|
|
"grad_norm": 0.452426619741269,
|
|
"learning_rate": 1.3020901495229632e-06,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07977472245693207,
|
|
"step": 4950,
|
|
"valid_targets_mean": 6329.5,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 6.280101394169836,
|
|
"grad_norm": 0.4189540319880572,
|
|
"learning_rate": 1.2797484869913901e-06,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08515322953462601,
|
|
"step": 4955,
|
|
"valid_targets_mean": 7589.2,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 6.2864385297845375,
|
|
"grad_norm": 0.415218988859957,
|
|
"learning_rate": 1.2575938230336782e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06813092529773712,
|
|
"step": 4960,
|
|
"valid_targets_mean": 5577.9,
|
|
"valid_targets_min": 4458
|
|
},
|
|
{
|
|
"epoch": 6.2927756653992395,
|
|
"grad_norm": 0.4594851376874127,
|
|
"learning_rate": 1.2356263789551015e-06,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07723643630743027,
|
|
"step": 4965,
|
|
"valid_targets_mean": 5432.5,
|
|
"valid_targets_min": 4433
|
|
},
|
|
{
|
|
"epoch": 6.299112801013941,
|
|
"grad_norm": 0.4377964204083324,
|
|
"learning_rate": 1.213846374190797e-06,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08214086294174194,
|
|
"step": 4970,
|
|
"valid_targets_mean": 6361.8,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 6.305449936628644,
|
|
"grad_norm": 0.44981701878590613,
|
|
"learning_rate": 1.192254026303532e-06,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0755121260881424,
|
|
"step": 4975,
|
|
"valid_targets_mean": 5824.4,
|
|
"valid_targets_min": 5129
|
|
},
|
|
{
|
|
"epoch": 6.311787072243346,
|
|
"grad_norm": 0.7643219177849129,
|
|
"learning_rate": 1.1708495509815564e-06,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07234219461679459,
|
|
"step": 4980,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 6.318124207858048,
|
|
"grad_norm": 0.44922848668163706,
|
|
"learning_rate": 1.1496331620364366e-06,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07825937867164612,
|
|
"step": 4985,
|
|
"valid_targets_mean": 6474.6,
|
|
"valid_targets_min": 4751
|
|
},
|
|
{
|
|
"epoch": 6.32446134347275,
|
|
"grad_norm": 0.4244323682997833,
|
|
"learning_rate": 1.1286050714009345e-06,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08661683648824692,
|
|
"step": 4990,
|
|
"valid_targets_mean": 7186.2,
|
|
"valid_targets_min": 5358
|
|
},
|
|
{
|
|
"epoch": 6.330798479087452,
|
|
"grad_norm": 0.39789746890822375,
|
|
"learning_rate": 1.107765489126864e-06,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07937803119421005,
|
|
"step": 4995,
|
|
"valid_targets_mean": 7126.8,
|
|
"valid_targets_min": 4078
|
|
},
|
|
{
|
|
"epoch": 6.337135614702155,
|
|
"grad_norm": 0.41493007382649727,
|
|
"learning_rate": 1.0871146233830231e-06,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07606764882802963,
|
|
"step": 5000,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 4940
|
|
},
|
|
{
|
|
"epoch": 6.343472750316857,
|
|
"grad_norm": 0.48353118463607714,
|
|
"learning_rate": 1.0666526804530997e-06,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08193910121917725,
|
|
"step": 5005,
|
|
"valid_targets_mean": 5900.6,
|
|
"valid_targets_min": 4779
|
|
},
|
|
{
|
|
"epoch": 6.349809885931559,
|
|
"grad_norm": 0.42529155994518664,
|
|
"learning_rate": 1.0463798647335977e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08134208619594574,
|
|
"step": 5010,
|
|
"valid_targets_mean": 6290.1,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 6.356147021546261,
|
|
"grad_norm": 0.5455915620096151,
|
|
"learning_rate": 1.0262963787318303e-06,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08074585348367691,
|
|
"step": 5015,
|
|
"valid_targets_mean": 1545.8,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 6.362484157160964,
|
|
"grad_norm": 0.41952125261345913,
|
|
"learning_rate": 1.0064024230638547e-06,
|
|
"loss": 0.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04329833388328552,
|
|
"step": 5020,
|
|
"valid_targets_mean": 3339.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.3688212927756656,
|
|
"grad_norm": 0.4728195257343744,
|
|
"learning_rate": 9.866981964525068e-07,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03722409904003143,
|
|
"step": 5025,
|
|
"valid_targets_mean": 2061.8,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 6.3751584283903675,
|
|
"grad_norm": 0.45440236610177764,
|
|
"learning_rate": 9.671838957253875e-07,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03274078667163849,
|
|
"step": 5030,
|
|
"valid_targets_mean": 2967.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.3814955640050695,
|
|
"grad_norm": 0.41282398749340576,
|
|
"learning_rate": 9.478597158129155e-07,
|
|
"loss": 0.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0378771610558033,
|
|
"step": 5035,
|
|
"valid_targets_mean": 2676.2,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 6.387832699619771,
|
|
"grad_norm": 0.4071253478659879,
|
|
"learning_rate": 9.287258497463658e-07,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036244507879018784,
|
|
"step": 5040,
|
|
"valid_targets_mean": 2992.4,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 6.394169835234474,
|
|
"grad_norm": 0.9563872273567805,
|
|
"learning_rate": 9.097824886559592e-07,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05392889678478241,
|
|
"step": 5045,
|
|
"valid_targets_mean": 1533.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 6.400506970849176,
|
|
"grad_norm": 0.3938748534224186,
|
|
"learning_rate": 8.910298217689295e-07,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036318231374025345,
|
|
"step": 5050,
|
|
"valid_targets_mean": 3398.4,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 6.406844106463878,
|
|
"grad_norm": 0.43097725195045306,
|
|
"learning_rate": 8.724680364076476e-07,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03481657803058624,
|
|
"step": 5055,
|
|
"valid_targets_mean": 2008.1,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 6.41318124207858,
|
|
"grad_norm": 0.2871621369602257,
|
|
"learning_rate": 8.540973179877587e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023948771879076958,
|
|
"step": 5060,
|
|
"valid_targets_mean": 4961.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 6.419518377693283,
|
|
"grad_norm": 0.390469529094409,
|
|
"learning_rate": 8.359178500163079e-07,
|
|
"loss": 0.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043171823024749756,
|
|
"step": 5065,
|
|
"valid_targets_mean": 4281.1,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 6.425855513307985,
|
|
"grad_norm": 0.3868675508536348,
|
|
"learning_rate": 8.179298140899283e-07,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033760152757167816,
|
|
"step": 5070,
|
|
"valid_targets_mean": 3922.4,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.432192648922687,
|
|
"grad_norm": 0.39227790873103163,
|
|
"learning_rate": 8.001333898930163e-07,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032616954296827316,
|
|
"step": 5075,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 6.438529784537389,
|
|
"grad_norm": 0.46665323756915855,
|
|
"learning_rate": 7.825287551959371e-07,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04251069575548172,
|
|
"step": 5080,
|
|
"valid_targets_mean": 2802.8,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.444866920152092,
|
|
"grad_norm": 0.3415345575838028,
|
|
"learning_rate": 7.651160858532436e-07,
|
|
"loss": 0.0637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02795930951833725,
|
|
"step": 5085,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 6.451204055766794,
|
|
"grad_norm": 0.5808582600219552,
|
|
"learning_rate": 7.478955558019408e-07,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04081156104803085,
|
|
"step": 5090,
|
|
"valid_targets_mean": 1567.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 6.4575411913814955,
|
|
"grad_norm": 0.3364471814008415,
|
|
"learning_rate": 7.308673370597219e-07,
|
|
"loss": 0.0589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02167585678398609,
|
|
"step": 5095,
|
|
"valid_targets_mean": 3412.6,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 6.4638783269961975,
|
|
"grad_norm": 0.45864699154059213,
|
|
"learning_rate": 7.140315997232661e-07,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03443540260195732,
|
|
"step": 5100,
|
|
"valid_targets_mean": 2430.0,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 6.4702154626109,
|
|
"grad_norm": 0.5696803446286315,
|
|
"learning_rate": 6.973885119665392e-07,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05110757052898407,
|
|
"step": 5105,
|
|
"valid_targets_mean": 1596.5,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 6.476552598225602,
|
|
"grad_norm": 0.5707546028230028,
|
|
"learning_rate": 6.809382400390996e-07,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04956081882119179,
|
|
"step": 5110,
|
|
"valid_targets_mean": 3337.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.482889733840304,
|
|
"grad_norm": 0.43682854638249413,
|
|
"learning_rate": 6.646809482644645e-07,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03871751204133034,
|
|
"step": 5115,
|
|
"valid_targets_mean": 2088.1,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 6.489226869455006,
|
|
"grad_norm": 0.48582178814833715,
|
|
"learning_rate": 6.486167990384329e-07,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02995602786540985,
|
|
"step": 5120,
|
|
"valid_targets_mean": 3485.2,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 6.495564005069708,
|
|
"grad_norm": 0.39514320184430207,
|
|
"learning_rate": 6.327459528275004e-07,
|
|
"loss": 0.0666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02845931053161621,
|
|
"step": 5125,
|
|
"valid_targets_mean": 4105.1,
|
|
"valid_targets_min": 3927
|
|
},
|
|
{
|
|
"epoch": 6.501901140684411,
|
|
"grad_norm": 0.6471868461509963,
|
|
"learning_rate": 6.170685681672206e-07,
|
|
"loss": 0.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06015824154019356,
|
|
"step": 5130,
|
|
"valid_targets_mean": 1385.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 6.508238276299113,
|
|
"grad_norm": 0.46543102778448175,
|
|
"learning_rate": 6.015848016606529e-07,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03237222880125046,
|
|
"step": 5135,
|
|
"valid_targets_mean": 2748.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.514575411913815,
|
|
"grad_norm": 0.41657687956705836,
|
|
"learning_rate": 5.862948079767838e-07,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040709175169467926,
|
|
"step": 5140,
|
|
"valid_targets_mean": 3963.2,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 6.520912547528517,
|
|
"grad_norm": 0.6671777377937809,
|
|
"learning_rate": 5.711987398489704e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10967512428760529,
|
|
"step": 5145,
|
|
"valid_targets_mean": 1719.4,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 6.52724968314322,
|
|
"grad_norm": 0.4677522712771826,
|
|
"learning_rate": 5.562967480734416e-07,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03834231197834015,
|
|
"step": 5150,
|
|
"valid_targets_mean": 3288.6,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 6.533586818757922,
|
|
"grad_norm": 0.4465602418609856,
|
|
"learning_rate": 5.41588981507768e-07,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031542278826236725,
|
|
"step": 5155,
|
|
"valid_targets_mean": 2379.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 6.5399239543726235,
|
|
"grad_norm": 0.47559943276032546,
|
|
"learning_rate": 5.270755870693877e-07,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04487092047929764,
|
|
"step": 5160,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 6.5462610899873255,
|
|
"grad_norm": 0.47336988481515074,
|
|
"learning_rate": 5.127567097341302e-07,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05472491681575775,
|
|
"step": 5165,
|
|
"valid_targets_mean": 4058.1,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 6.552598225602027,
|
|
"grad_norm": 0.3491673536496835,
|
|
"learning_rate": 4.986324925347807e-07,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032516129314899445,
|
|
"step": 5170,
|
|
"valid_targets_mean": 3983.5,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 6.55893536121673,
|
|
"grad_norm": 0.3576751462496197,
|
|
"learning_rate": 4.847030765596405e-07,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02839261293411255,
|
|
"step": 5175,
|
|
"valid_targets_mean": 3440.0,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.565272496831432,
|
|
"grad_norm": 0.4306772805636741,
|
|
"learning_rate": 4.709686009511183e-07,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04212718456983566,
|
|
"step": 5180,
|
|
"valid_targets_mean": 3540.6,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 6.571609632446134,
|
|
"grad_norm": 0.3853551381030921,
|
|
"learning_rate": 4.574292029043448e-07,
|
|
"loss": 0.0657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026643194258213043,
|
|
"step": 5185,
|
|
"valid_targets_mean": 3660.2,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 6.577946768060836,
|
|
"grad_norm": 0.386961047280167,
|
|
"learning_rate": 4.440850176658007e-07,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03077574633061886,
|
|
"step": 5190,
|
|
"valid_targets_mean": 3504.1,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 6.584283903675539,
|
|
"grad_norm": 0.3998397912833084,
|
|
"learning_rate": 4.3093617853196213e-07,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03495289012789726,
|
|
"step": 5195,
|
|
"valid_targets_mean": 3628.4,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 6.590621039290241,
|
|
"grad_norm": 0.36618107624212975,
|
|
"learning_rate": 4.179828168479794e-07,
|
|
"loss": 0.0653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02845647931098938,
|
|
"step": 5200,
|
|
"valid_targets_mean": 3437.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.596958174904943,
|
|
"grad_norm": 0.6915872118164176,
|
|
"learning_rate": 4.052250620063514e-07,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08474846184253693,
|
|
"step": 5205,
|
|
"valid_targets_mean": 2623.4,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.603295310519645,
|
|
"grad_norm": 0.335046430675319,
|
|
"learning_rate": 3.9266304144564006e-07,
|
|
"loss": 0.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02567702904343605,
|
|
"step": 5210,
|
|
"valid_targets_mean": 3695.5,
|
|
"valid_targets_min": 2877
|
|
},
|
|
{
|
|
"epoch": 6.609632446134348,
|
|
"grad_norm": 0.6028695907148579,
|
|
"learning_rate": 3.8029688064920247e-07,
|
|
"loss": 0.0612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045309070497751236,
|
|
"step": 5215,
|
|
"valid_targets_mean": 2015.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 6.61596958174905,
|
|
"grad_norm": 0.33364180599945814,
|
|
"learning_rate": 3.681267031439251e-07,
|
|
"loss": 0.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02560180239379406,
|
|
"step": 5220,
|
|
"valid_targets_mean": 3648.8,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 6.6223067173637515,
|
|
"grad_norm": 0.40054196370071193,
|
|
"learning_rate": 3.5615263049899815e-07,
|
|
"loss": 0.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03592067211866379,
|
|
"step": 5225,
|
|
"valid_targets_mean": 2543.6,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.6286438529784535,
|
|
"grad_norm": 0.39342685550267,
|
|
"learning_rate": 3.4437478232470123e-07,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031389348208904266,
|
|
"step": 5230,
|
|
"valid_targets_mean": 3863.6,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 6.634980988593156,
|
|
"grad_norm": 0.4315906076877965,
|
|
"learning_rate": 3.3279327627120606e-07,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06375448405742645,
|
|
"step": 5235,
|
|
"valid_targets_mean": 2883.1,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 6.641318124207858,
|
|
"grad_norm": 0.4073333663844834,
|
|
"learning_rate": 3.214082280274067e-07,
|
|
"loss": 0.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03171767666935921,
|
|
"step": 5240,
|
|
"valid_targets_mean": 1581.1,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 6.64765525982256,
|
|
"grad_norm": 0.9989255974173777,
|
|
"learning_rate": 3.1021975131975137e-07,
|
|
"loss": 0.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07699338346719742,
|
|
"step": 5245,
|
|
"valid_targets_mean": 1356.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.653992395437262,
|
|
"grad_norm": 0.6231345570332961,
|
|
"learning_rate": 2.9922795791111905e-07,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056185685098171234,
|
|
"step": 5250,
|
|
"valid_targets_mean": 2382.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.660329531051964,
|
|
"grad_norm": 0.5130353022679787,
|
|
"learning_rate": 2.884329575996958e-07,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03647318482398987,
|
|
"step": 5255,
|
|
"valid_targets_mean": 2193.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.37875884732540666,
|
|
"learning_rate": 2.778348582178847e-07,
|
|
"loss": 0.0925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03465791791677475,
|
|
"step": 5260,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 6.673003802281369,
|
|
"grad_norm": 0.5763567692866084,
|
|
"learning_rate": 2.674337656312198e-07,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06274057924747467,
|
|
"step": 5265,
|
|
"valid_targets_mean": 1787.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.679340937896071,
|
|
"grad_norm": 0.38121531156270294,
|
|
"learning_rate": 2.5722978373731835e-07,
|
|
"loss": 0.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029012668877840042,
|
|
"step": 5270,
|
|
"valid_targets_mean": 2625.9,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.685678073510773,
|
|
"grad_norm": 0.278306079703309,
|
|
"learning_rate": 2.472230144648369e-07,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02001018449664116,
|
|
"step": 5275,
|
|
"valid_targets_mean": 5125.8,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 6.692015209125476,
|
|
"grad_norm": 0.4038137221311576,
|
|
"learning_rate": 2.374135577724479e-07,
|
|
"loss": 0.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027957838028669357,
|
|
"step": 5280,
|
|
"valid_targets_mean": 3718.0,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.698352344740178,
|
|
"grad_norm": 0.39887523491288646,
|
|
"learning_rate": 2.2780151164785825e-07,
|
|
"loss": 0.0647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03297743946313858,
|
|
"step": 5285,
|
|
"valid_targets_mean": 3669.1,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.7046894803548795,
|
|
"grad_norm": 0.49673569655527894,
|
|
"learning_rate": 2.183869721068077e-07,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03876856714487076,
|
|
"step": 5290,
|
|
"valid_targets_mean": 1833.0,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.7110266159695815,
|
|
"grad_norm": 0.4143633795195941,
|
|
"learning_rate": 2.0917003319213426e-07,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036633510142564774,
|
|
"step": 5295,
|
|
"valid_targets_mean": 3618.4,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 6.7173637515842834,
|
|
"grad_norm": 0.43138395570136384,
|
|
"learning_rate": 2.0015078697281477e-07,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03896140679717064,
|
|
"step": 5300,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 6.723700887198986,
|
|
"grad_norm": 0.32763671416659734,
|
|
"learning_rate": 1.9132932354305023e-07,
|
|
"loss": 0.0667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0293594878166914,
|
|
"step": 5305,
|
|
"valid_targets_mean": 3460.1,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 6.730038022813688,
|
|
"grad_norm": 0.4025197604229111,
|
|
"learning_rate": 1.8270573102137757e-07,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031069481745362282,
|
|
"step": 5310,
|
|
"valid_targets_mean": 2901.4,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 6.73637515842839,
|
|
"grad_norm": 0.368563608343647,
|
|
"learning_rate": 1.7428009554977255e-07,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031165387481451035,
|
|
"step": 5315,
|
|
"valid_targets_mean": 3648.0,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 6.742712294043092,
|
|
"grad_norm": 0.3398743658985008,
|
|
"learning_rate": 1.660525012928038e-07,
|
|
"loss": 0.0582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029926961287856102,
|
|
"step": 5320,
|
|
"valid_targets_mean": 3957.9,
|
|
"valid_targets_min": 3718
|
|
},
|
|
{
|
|
"epoch": 6.749049429657795,
|
|
"grad_norm": 0.43699974388487023,
|
|
"learning_rate": 1.5802303043677359e-07,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02996104396879673,
|
|
"step": 5325,
|
|
"valid_targets_mean": 2735.0,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.755386565272497,
|
|
"grad_norm": 0.3970785222606904,
|
|
"learning_rate": 1.501917631889227e-07,
|
|
"loss": 0.0705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025564653798937798,
|
|
"step": 5330,
|
|
"valid_targets_mean": 3689.2,
|
|
"valid_targets_min": 2587
|
|
},
|
|
{
|
|
"epoch": 6.761723700887199,
|
|
"grad_norm": 0.4229498421695961,
|
|
"learning_rate": 1.4255877777660688e-07,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0335562564432621,
|
|
"step": 5335,
|
|
"valid_targets_mean": 3088.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.768060836501901,
|
|
"grad_norm": 0.5011161827218653,
|
|
"learning_rate": 1.3512415044652615e-07,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03598347306251526,
|
|
"step": 5340,
|
|
"valid_targets_mean": 2951.0,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 6.774397972116604,
|
|
"grad_norm": 0.8621712950956477,
|
|
"learning_rate": 1.2788795546395672e-07,
|
|
"loss": 0.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04856495559215546,
|
|
"step": 5345,
|
|
"valid_targets_mean": 1399.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.780735107731306,
|
|
"grad_norm": 0.5466727146474838,
|
|
"learning_rate": 1.2085026511202025e-07,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04380311816930771,
|
|
"step": 5350,
|
|
"valid_targets_mean": 1881.5,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 6.787072243346008,
|
|
"grad_norm": 0.45216404361768037,
|
|
"learning_rate": 1.1401114969094684e-07,
|
|
"loss": 0.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03542492538690567,
|
|
"step": 5355,
|
|
"valid_targets_mean": 2534.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.7934093789607095,
|
|
"grad_norm": 0.48803764277453954,
|
|
"learning_rate": 1.073706775173866e-07,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03730048984289169,
|
|
"step": 5360,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.799746514575412,
|
|
"grad_norm": 0.6097191485201289,
|
|
"learning_rate": 1.0092891492371915e-07,
|
|
"loss": 0.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04434138536453247,
|
|
"step": 5365,
|
|
"valid_targets_mean": 1503.4,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.806083650190114,
|
|
"grad_norm": 0.7023621045090173,
|
|
"learning_rate": 9.46859262573896e-08,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07682442665100098,
|
|
"step": 5370,
|
|
"valid_targets_mean": 1759.2,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.812420785804816,
|
|
"grad_norm": 0.32899875017102476,
|
|
"learning_rate": 8.864177388027806e-08,
|
|
"loss": 0.0649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018850531429052353,
|
|
"step": 5375,
|
|
"valid_targets_mean": 3772.2,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 6.818757921419518,
|
|
"grad_norm": 0.37999929865026383,
|
|
"learning_rate": 8.27965181680579e-08,
|
|
"loss": 0.0657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0284842811524868,
|
|
"step": 5380,
|
|
"valid_targets_mean": 3916.1,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 6.82509505703422,
|
|
"grad_norm": 0.40338285595861056,
|
|
"learning_rate": 7.715021750960949e-08,
|
|
"loss": 0.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031235236674547195,
|
|
"step": 5385,
|
|
"valid_targets_mean": 2122.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.831432192648923,
|
|
"grad_norm": 0.41795407069026075,
|
|
"learning_rate": 7.170292830642745e-08,
|
|
"loss": 0.062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04410218074917793,
|
|
"step": 5390,
|
|
"valid_targets_mean": 5008.0,
|
|
"valid_targets_min": 4222
|
|
},
|
|
{
|
|
"epoch": 6.837769328263625,
|
|
"grad_norm": 0.39805051125193114,
|
|
"learning_rate": 6.645470497205875e-08,
|
|
"loss": 0.0597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039797261357307434,
|
|
"step": 5395,
|
|
"valid_targets_mean": 4978.8,
|
|
"valid_targets_min": 4104
|
|
},
|
|
{
|
|
"epoch": 6.844106463878327,
|
|
"grad_norm": 0.39880123680832447,
|
|
"learning_rate": 6.140559993156325e-08,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03050404228270054,
|
|
"step": 5400,
|
|
"valid_targets_mean": 2777.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 6.850443599493029,
|
|
"grad_norm": 0.5008248945413296,
|
|
"learning_rate": 5.655566362098519e-08,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04370546340942383,
|
|
"step": 5405,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 3352
|
|
},
|
|
{
|
|
"epoch": 6.856780735107732,
|
|
"grad_norm": 0.4180163473404803,
|
|
"learning_rate": 5.1904944486851347e-08,
|
|
"loss": 0.0696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04113881289958954,
|
|
"step": 5410,
|
|
"valid_targets_mean": 3456.0,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 6.863117870722434,
|
|
"grad_norm": 0.43434101480583703,
|
|
"learning_rate": 4.7453488985687024e-08,
|
|
"loss": 0.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03484988585114479,
|
|
"step": 5415,
|
|
"valid_targets_mean": 2187.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.869455006337136,
|
|
"grad_norm": 0.38692383862801555,
|
|
"learning_rate": 4.3201341583554154e-08,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023803317919373512,
|
|
"step": 5420,
|
|
"valid_targets_mean": 2997.9,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 6.8757921419518375,
|
|
"grad_norm": 0.4416702528827446,
|
|
"learning_rate": 3.914854475560281e-08,
|
|
"loss": 0.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033075716346502304,
|
|
"step": 5425,
|
|
"valid_targets_mean": 3161.9,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 6.8821292775665395,
|
|
"grad_norm": 0.40891162915361196,
|
|
"learning_rate": 3.5295138985647074e-08,
|
|
"loss": 0.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027852313593029976,
|
|
"step": 5430,
|
|
"valid_targets_mean": 2517.6,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.888466413181242,
|
|
"grad_norm": 0.5150870984912126,
|
|
"learning_rate": 3.1641162765767565e-08,
|
|
"loss": 0.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03757266700267792,
|
|
"step": 5435,
|
|
"valid_targets_mean": 2190.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 6.894803548795944,
|
|
"grad_norm": 0.5786129033277824,
|
|
"learning_rate": 2.8186652595918464e-08,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0334940068423748,
|
|
"step": 5440,
|
|
"valid_targets_mean": 1784.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 6.901140684410646,
|
|
"grad_norm": 0.4758617746989478,
|
|
"learning_rate": 2.4931642983569983e-08,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03995145857334137,
|
|
"step": 5445,
|
|
"valid_targets_mean": 3771.6,
|
|
"valid_targets_min": 2811
|
|
},
|
|
{
|
|
"epoch": 6.907477820025348,
|
|
"grad_norm": 0.4582380652561686,
|
|
"learning_rate": 2.187616644335311e-08,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029735777527093887,
|
|
"step": 5450,
|
|
"valid_targets_mean": 1885.5,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.913814955640051,
|
|
"grad_norm": 0.39480575838606885,
|
|
"learning_rate": 1.902025349674874e-08,
|
|
"loss": 0.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03567538410425186,
|
|
"step": 5455,
|
|
"valid_targets_mean": 3215.2,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 6.920152091254753,
|
|
"grad_norm": 0.3969611366049055,
|
|
"learning_rate": 1.6363932671774606e-08,
|
|
"loss": 0.0668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02705613523721695,
|
|
"step": 5460,
|
|
"valid_targets_mean": 2493.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 6.926489226869455,
|
|
"grad_norm": 0.38493567865115996,
|
|
"learning_rate": 1.3907230502701042e-08,
|
|
"loss": 0.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03154352679848671,
|
|
"step": 5465,
|
|
"valid_targets_mean": 3232.9,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 6.932826362484157,
|
|
"grad_norm": 0.4477550383350358,
|
|
"learning_rate": 1.1650171529782317e-08,
|
|
"loss": 0.0826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033137910068035126,
|
|
"step": 5470,
|
|
"valid_targets_mean": 3239.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 6.93916349809886,
|
|
"grad_norm": 0.3747959720280146,
|
|
"learning_rate": 9.592778299023497e-09,
|
|
"loss": 0.0611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028404461219906807,
|
|
"step": 5475,
|
|
"valid_targets_mean": 3656.8,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.945500633713562,
|
|
"grad_norm": 0.4986494021625809,
|
|
"learning_rate": 7.735071361940627e-09,
|
|
"loss": 0.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03797287493944168,
|
|
"step": 5480,
|
|
"valid_targets_mean": 2134.0,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 6.951837769328264,
|
|
"grad_norm": 0.5138289179806156,
|
|
"learning_rate": 6.077069275365332e-09,
|
|
"loss": 0.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04770292341709137,
|
|
"step": 5485,
|
|
"valid_targets_mean": 3470.4,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 6.9581749049429655,
|
|
"grad_norm": 0.4223607668195561,
|
|
"learning_rate": 4.618788601258306e-09,
|
|
"loss": 0.0712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041325293481349945,
|
|
"step": 5490,
|
|
"valid_targets_mean": 3838.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 6.964512040557668,
|
|
"grad_norm": 0.5247706279775707,
|
|
"learning_rate": 3.360243906536109e-09,
|
|
"loss": 0.0615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0402681939303875,
|
|
"step": 5495,
|
|
"valid_targets_mean": 3975.0,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 6.97084917617237,
|
|
"grad_norm": 0.3463842918844926,
|
|
"learning_rate": 2.301447762937947e-09,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027191126719117165,
|
|
"step": 5500,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 6.977186311787072,
|
|
"grad_norm": 0.5948448632092593,
|
|
"learning_rate": 1.4424107468880012e-09,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09361372888088226,
|
|
"step": 5505,
|
|
"valid_targets_mean": 2070.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 6.983523447401774,
|
|
"grad_norm": 0.33183971694940084,
|
|
"learning_rate": 7.831414393999481e-10,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024456195533275604,
|
|
"step": 5510,
|
|
"valid_targets_mean": 3756.6,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 6.989860583016476,
|
|
"grad_norm": 0.4263769125069856,
|
|
"learning_rate": 3.2364642598370357e-10,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0352926105260849,
|
|
"step": 5515,
|
|
"valid_targets_mean": 3133.0,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 6.996197718631179,
|
|
"grad_norm": 0.38010451741442985,
|
|
"learning_rate": 6.393029658324779e-11,
|
|
"loss": 0.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0321081317961216,
|
|
"step": 5520,
|
|
"valid_targets_mean": 3714.0,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 5523,
|
|
"total_flos": 2.4102921715140526e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 1.8518,
|
|
"train_samples_per_second": 47698.611,
|
|
"train_steps_per_second": 2982.581
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 5523,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4102921715140526e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|