9882 lines
274 KiB
JSON
9882 lines
274 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4473,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00782472613458529,
|
|
"grad_norm": 14.985624898494553,
|
|
"learning_rate": 3.5714285714285716e-07,
|
|
"loss": 0.8512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3705439865589142,
|
|
"step": 5,
|
|
"valid_targets_mean": 4836.5,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.01564945226917058,
|
|
"grad_norm": 15.531346366019337,
|
|
"learning_rate": 8.035714285714287e-07,
|
|
"loss": 0.9841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42504867911338806,
|
|
"step": 10,
|
|
"valid_targets_mean": 3165.9,
|
|
"valid_targets_min": 866
|
|
},
|
|
{
|
|
"epoch": 0.023474178403755867,
|
|
"grad_norm": 15.10440352165393,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.9025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42286789417266846,
|
|
"step": 15,
|
|
"valid_targets_mean": 3718.6,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 0.03129890453834116,
|
|
"grad_norm": 11.23146592911026,
|
|
"learning_rate": 1.6964285714285717e-06,
|
|
"loss": 0.8985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5172112584114075,
|
|
"step": 20,
|
|
"valid_targets_mean": 3833.4,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 0.03912363067292645,
|
|
"grad_norm": 7.460004570792934,
|
|
"learning_rate": 2.1428571428571427e-06,
|
|
"loss": 0.8522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4645783007144928,
|
|
"step": 25,
|
|
"valid_targets_mean": 4746.6,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 0.046948356807511735,
|
|
"grad_norm": 3.5068432358653276,
|
|
"learning_rate": 2.5892857142857148e-06,
|
|
"loss": 0.7623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294888615608215,
|
|
"step": 30,
|
|
"valid_targets_mean": 3410.4,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 0.054773082942097026,
|
|
"grad_norm": 3.468429946808279,
|
|
"learning_rate": 3.0357142857142856e-06,
|
|
"loss": 0.7298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39859524369239807,
|
|
"step": 35,
|
|
"valid_targets_mean": 3317.6,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 0.06259780907668232,
|
|
"grad_norm": 1.7865664508427699,
|
|
"learning_rate": 3.482142857142857e-06,
|
|
"loss": 0.637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711215019226074,
|
|
"step": 40,
|
|
"valid_targets_mean": 3389.4,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.07042253521126761,
|
|
"grad_norm": 1.8859404916127087,
|
|
"learning_rate": 3.928571428571429e-06,
|
|
"loss": 0.6746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31541749835014343,
|
|
"step": 45,
|
|
"valid_targets_mean": 2487.4,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 0.0782472613458529,
|
|
"grad_norm": 1.6385921090382252,
|
|
"learning_rate": 4.3750000000000005e-06,
|
|
"loss": 0.6594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32305485010147095,
|
|
"step": 50,
|
|
"valid_targets_mean": 2058.1,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 0.08607198748043818,
|
|
"grad_norm": 1.2819419484768044,
|
|
"learning_rate": 4.821428571428572e-06,
|
|
"loss": 0.6574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41548952460289,
|
|
"step": 55,
|
|
"valid_targets_mean": 4105.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.09389671361502347,
|
|
"grad_norm": 0.9550342451209143,
|
|
"learning_rate": 5.267857142857144e-06,
|
|
"loss": 0.6602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28189122676849365,
|
|
"step": 60,
|
|
"valid_targets_mean": 3060.5,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 0.10172143974960876,
|
|
"grad_norm": 0.8546526008417541,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 0.6166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2471541166305542,
|
|
"step": 65,
|
|
"valid_targets_mean": 4375.2,
|
|
"valid_targets_min": 2623
|
|
},
|
|
{
|
|
"epoch": 0.10954616588419405,
|
|
"grad_norm": 0.6969886750248636,
|
|
"learning_rate": 6.160714285714286e-06,
|
|
"loss": 0.63,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3634992837905884,
|
|
"step": 70,
|
|
"valid_targets_mean": 5958.5,
|
|
"valid_targets_min": 3163
|
|
},
|
|
{
|
|
"epoch": 0.11737089201877934,
|
|
"grad_norm": 0.7216117313064434,
|
|
"learning_rate": 6.607142857142858e-06,
|
|
"loss": 0.5609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29145413637161255,
|
|
"step": 75,
|
|
"valid_targets_mean": 5129.1,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 0.12519561815336464,
|
|
"grad_norm": 0.9035809128869251,
|
|
"learning_rate": 7.053571428571429e-06,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673933804035187,
|
|
"step": 80,
|
|
"valid_targets_mean": 3132.6,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 0.13302034428794993,
|
|
"grad_norm": 0.9476098952734502,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26275813579559326,
|
|
"step": 85,
|
|
"valid_targets_mean": 1995.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.14084507042253522,
|
|
"grad_norm": 0.6930488659940748,
|
|
"learning_rate": 7.946428571428571e-06,
|
|
"loss": 0.5817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28456491231918335,
|
|
"step": 90,
|
|
"valid_targets_mean": 3873.4,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 0.1486697965571205,
|
|
"grad_norm": 0.681422278787771,
|
|
"learning_rate": 8.392857142857144e-06,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25226083397865295,
|
|
"step": 95,
|
|
"valid_targets_mean": 4069.2,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.1564945226917058,
|
|
"grad_norm": 0.711708007345044,
|
|
"learning_rate": 8.839285714285714e-06,
|
|
"loss": 0.5399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17513154447078705,
|
|
"step": 100,
|
|
"valid_targets_mean": 2554.4,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 0.1643192488262911,
|
|
"grad_norm": 0.70979265394368,
|
|
"learning_rate": 9.285714285714288e-06,
|
|
"loss": 0.5684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24423879384994507,
|
|
"step": 105,
|
|
"valid_targets_mean": 2976.6,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 0.17214397496087636,
|
|
"grad_norm": 0.7409905339814179,
|
|
"learning_rate": 9.732142857142858e-06,
|
|
"loss": 0.5736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586560547351837,
|
|
"step": 110,
|
|
"valid_targets_mean": 2887.9,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 0.17996870109546165,
|
|
"grad_norm": 0.753708572424868,
|
|
"learning_rate": 1.0178571428571429e-05,
|
|
"loss": 0.5631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.305778443813324,
|
|
"step": 115,
|
|
"valid_targets_mean": 3828.5,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 0.18779342723004694,
|
|
"grad_norm": 0.8123594258826431,
|
|
"learning_rate": 1.0625e-05,
|
|
"loss": 0.5707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25991448760032654,
|
|
"step": 120,
|
|
"valid_targets_mean": 3137.5,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 0.19561815336463223,
|
|
"grad_norm": 0.6567517377648533,
|
|
"learning_rate": 1.1071428571428572e-05,
|
|
"loss": 0.5311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18091034889221191,
|
|
"step": 125,
|
|
"valid_targets_mean": 2712.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.20344287949921752,
|
|
"grad_norm": 0.9108494465232777,
|
|
"learning_rate": 1.1517857142857142e-05,
|
|
"loss": 0.5303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547938823699951,
|
|
"step": 130,
|
|
"valid_targets_mean": 2424.1,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.2112676056338028,
|
|
"grad_norm": 0.6142333826933215,
|
|
"learning_rate": 1.1964285714285716e-05,
|
|
"loss": 0.5115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21922193467617035,
|
|
"step": 135,
|
|
"valid_targets_mean": 3643.1,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 0.2190923317683881,
|
|
"grad_norm": 0.6850364484150541,
|
|
"learning_rate": 1.2410714285714287e-05,
|
|
"loss": 0.5176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808055877685547,
|
|
"step": 140,
|
|
"valid_targets_mean": 4085.6,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 0.2269170579029734,
|
|
"grad_norm": 0.6809628002277747,
|
|
"learning_rate": 1.2857142857142859e-05,
|
|
"loss": 0.5588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230814129114151,
|
|
"step": 145,
|
|
"valid_targets_mean": 3154.2,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.2347417840375587,
|
|
"grad_norm": 0.7721572219651329,
|
|
"learning_rate": 1.3303571428571429e-05,
|
|
"loss": 0.5132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27139559388160706,
|
|
"step": 150,
|
|
"valid_targets_mean": 3477.9,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 0.24256651017214398,
|
|
"grad_norm": 0.8247703748231433,
|
|
"learning_rate": 1.375e-05,
|
|
"loss": 0.5135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22778967022895813,
|
|
"step": 155,
|
|
"valid_targets_mean": 2290.5,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.25039123630672927,
|
|
"grad_norm": 0.8999440062447888,
|
|
"learning_rate": 1.4196428571428574e-05,
|
|
"loss": 0.5287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22713550925254822,
|
|
"step": 160,
|
|
"valid_targets_mean": 3128.9,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 0.25821596244131456,
|
|
"grad_norm": 0.7210147544343504,
|
|
"learning_rate": 1.4642857142857144e-05,
|
|
"loss": 0.5242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22318416833877563,
|
|
"step": 165,
|
|
"valid_targets_mean": 3415.0,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 0.26604068857589985,
|
|
"grad_norm": 0.7496522920896888,
|
|
"learning_rate": 1.5089285714285715e-05,
|
|
"loss": 0.5075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22558245062828064,
|
|
"step": 170,
|
|
"valid_targets_mean": 2500.1,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 0.27386541471048514,
|
|
"grad_norm": 0.5982516445406875,
|
|
"learning_rate": 1.553571428571429e-05,
|
|
"loss": 0.5169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30611079931259155,
|
|
"step": 175,
|
|
"valid_targets_mean": 5059.6,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 0.28169014084507044,
|
|
"grad_norm": 0.7295607876211301,
|
|
"learning_rate": 1.598214285714286e-05,
|
|
"loss": 0.5168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15322911739349365,
|
|
"step": 180,
|
|
"valid_targets_mean": 2262.0,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 0.2895148669796557,
|
|
"grad_norm": 0.6551258381274977,
|
|
"learning_rate": 1.642857142857143e-05,
|
|
"loss": 0.5073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3250609040260315,
|
|
"step": 185,
|
|
"valid_targets_mean": 4388.4,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 0.297339593114241,
|
|
"grad_norm": 0.8048196514998961,
|
|
"learning_rate": 1.6875e-05,
|
|
"loss": 0.4956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23925356566905975,
|
|
"step": 190,
|
|
"valid_targets_mean": 2612.5,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 0.3051643192488263,
|
|
"grad_norm": 0.6921810264753487,
|
|
"learning_rate": 1.7321428571428572e-05,
|
|
"loss": 0.5384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808268070220947,
|
|
"step": 195,
|
|
"valid_targets_mean": 3453.8,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.3129890453834116,
|
|
"grad_norm": 0.7096071862764995,
|
|
"learning_rate": 1.7767857142857143e-05,
|
|
"loss": 0.4963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21128416061401367,
|
|
"step": 200,
|
|
"valid_targets_mean": 2638.8,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 0.3208137715179969,
|
|
"grad_norm": 0.7161172690754724,
|
|
"learning_rate": 1.8214285714285715e-05,
|
|
"loss": 0.4953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30294111371040344,
|
|
"step": 205,
|
|
"valid_targets_mean": 4359.5,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 0.3286384976525822,
|
|
"grad_norm": 0.7047537625851548,
|
|
"learning_rate": 1.8660714285714287e-05,
|
|
"loss": 0.4876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21961969137191772,
|
|
"step": 210,
|
|
"valid_targets_mean": 3732.0,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 0.3364632237871675,
|
|
"grad_norm": 1.0751821988744354,
|
|
"learning_rate": 1.910714285714286e-05,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22165444493293762,
|
|
"step": 215,
|
|
"valid_targets_mean": 3293.6,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 0.3442879499217527,
|
|
"grad_norm": 0.7348101165430592,
|
|
"learning_rate": 1.955357142857143e-05,
|
|
"loss": 0.524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26132524013519287,
|
|
"step": 220,
|
|
"valid_targets_mean": 3435.0,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 0.352112676056338,
|
|
"grad_norm": 0.6014436414597776,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.4969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3515092432498932,
|
|
"step": 225,
|
|
"valid_targets_mean": 6420.1,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 0.3599374021909233,
|
|
"grad_norm": 0.7068499254869766,
|
|
"learning_rate": 2.0446428571428573e-05,
|
|
"loss": 0.4961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897939682006836,
|
|
"step": 230,
|
|
"valid_targets_mean": 4268.2,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 0.3677621283255086,
|
|
"grad_norm": 0.7346064770092685,
|
|
"learning_rate": 2.0892857142857145e-05,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26461800932884216,
|
|
"step": 235,
|
|
"valid_targets_mean": 4015.0,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 0.3755868544600939,
|
|
"grad_norm": 0.6644681474473609,
|
|
"learning_rate": 2.1339285714285717e-05,
|
|
"loss": 0.4743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3158893585205078,
|
|
"step": 240,
|
|
"valid_targets_mean": 4956.6,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.38341158059467917,
|
|
"grad_norm": 0.6378798903092674,
|
|
"learning_rate": 2.1785714285714285e-05,
|
|
"loss": 0.4916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483050525188446,
|
|
"step": 245,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.39123630672926446,
|
|
"grad_norm": 0.8136328295860369,
|
|
"learning_rate": 2.2232142857142856e-05,
|
|
"loss": 0.4749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24282008409500122,
|
|
"step": 250,
|
|
"valid_targets_mean": 3456.9,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.39906103286384975,
|
|
"grad_norm": 0.6906250007452486,
|
|
"learning_rate": 2.267857142857143e-05,
|
|
"loss": 0.4899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2115616500377655,
|
|
"step": 255,
|
|
"valid_targets_mean": 3368.0,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.40688575899843504,
|
|
"grad_norm": 0.6899129298271534,
|
|
"learning_rate": 2.3125000000000003e-05,
|
|
"loss": 0.4361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16635742783546448,
|
|
"step": 260,
|
|
"valid_targets_mean": 2941.6,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 0.41471048513302033,
|
|
"grad_norm": 0.8109744986759924,
|
|
"learning_rate": 2.3571428571428575e-05,
|
|
"loss": 0.4696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1854422688484192,
|
|
"step": 265,
|
|
"valid_targets_mean": 2187.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 0.4225352112676056,
|
|
"grad_norm": 0.5732592094508333,
|
|
"learning_rate": 2.4017857142857146e-05,
|
|
"loss": 0.4849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32333260774612427,
|
|
"step": 270,
|
|
"valid_targets_mean": 7258.2,
|
|
"valid_targets_min": 2364
|
|
},
|
|
{
|
|
"epoch": 0.4303599374021909,
|
|
"grad_norm": 0.6923546469575422,
|
|
"learning_rate": 2.4464285714285718e-05,
|
|
"loss": 0.4564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20542533695697784,
|
|
"step": 275,
|
|
"valid_targets_mean": 2852.2,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.4381846635367762,
|
|
"grad_norm": 0.934328843243627,
|
|
"learning_rate": 2.4910714285714286e-05,
|
|
"loss": 0.4962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22699972987174988,
|
|
"step": 280,
|
|
"valid_targets_mean": 2106.9,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.4460093896713615,
|
|
"grad_norm": 0.634955267962378,
|
|
"learning_rate": 2.5357142857142858e-05,
|
|
"loss": 0.4452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2429167777299881,
|
|
"step": 285,
|
|
"valid_targets_mean": 4528.0,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.4538341158059468,
|
|
"grad_norm": 0.7670966784127312,
|
|
"learning_rate": 2.580357142857143e-05,
|
|
"loss": 0.4585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15874019265174866,
|
|
"step": 290,
|
|
"valid_targets_mean": 2907.4,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 0.4616588419405321,
|
|
"grad_norm": 0.8540039688231614,
|
|
"learning_rate": 2.625e-05,
|
|
"loss": 0.4699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443799376487732,
|
|
"step": 295,
|
|
"valid_targets_mean": 2682.6,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 0.4694835680751174,
|
|
"grad_norm": 0.6294420901379963,
|
|
"learning_rate": 2.6696428571428573e-05,
|
|
"loss": 0.4382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2190573811531067,
|
|
"step": 300,
|
|
"valid_targets_mean": 5212.8,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 0.47730829420970267,
|
|
"grad_norm": 0.7727535727890618,
|
|
"learning_rate": 2.7142857142857148e-05,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21273700892925262,
|
|
"step": 305,
|
|
"valid_targets_mean": 2760.6,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 0.48513302034428796,
|
|
"grad_norm": 0.7564777337114239,
|
|
"learning_rate": 2.758928571428572e-05,
|
|
"loss": 0.4596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24206706881523132,
|
|
"step": 310,
|
|
"valid_targets_mean": 2762.0,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.49295774647887325,
|
|
"grad_norm": 0.859466869280641,
|
|
"learning_rate": 2.8035714285714288e-05,
|
|
"loss": 0.4674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28809595108032227,
|
|
"step": 315,
|
|
"valid_targets_mean": 2851.4,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 0.5007824726134585,
|
|
"grad_norm": 0.8043363429673056,
|
|
"learning_rate": 2.848214285714286e-05,
|
|
"loss": 0.4814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23001626133918762,
|
|
"step": 320,
|
|
"valid_targets_mean": 2885.0,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 0.5086071987480438,
|
|
"grad_norm": 0.554590656020954,
|
|
"learning_rate": 2.892857142857143e-05,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34783557057380676,
|
|
"step": 325,
|
|
"valid_targets_mean": 7499.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.5164319248826291,
|
|
"grad_norm": 0.6928196656976202,
|
|
"learning_rate": 2.9375000000000003e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515498399734497,
|
|
"step": 330,
|
|
"valid_targets_mean": 4332.4,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 0.5242566510172144,
|
|
"grad_norm": 0.6741776746870901,
|
|
"learning_rate": 2.9821428571428574e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25081321597099304,
|
|
"step": 335,
|
|
"valid_targets_mean": 4064.4,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 0.5320813771517997,
|
|
"grad_norm": 0.7155998149036356,
|
|
"learning_rate": 3.0267857142857146e-05,
|
|
"loss": 0.439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16516746580600739,
|
|
"step": 340,
|
|
"valid_targets_mean": 2477.8,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 0.539906103286385,
|
|
"grad_norm": 0.7416484527497875,
|
|
"learning_rate": 3.071428571428572e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15393097698688507,
|
|
"step": 345,
|
|
"valid_targets_mean": 2662.1,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 0.5477308294209703,
|
|
"grad_norm": 0.6640391684201965,
|
|
"learning_rate": 3.116071428571429e-05,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638360857963562,
|
|
"step": 350,
|
|
"valid_targets_mean": 5491.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.8206000274320715,
|
|
"learning_rate": 3.160714285714286e-05,
|
|
"loss": 0.4761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396335005760193,
|
|
"step": 355,
|
|
"valid_targets_mean": 2779.1,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 0.5633802816901409,
|
|
"grad_norm": 0.7929490046249823,
|
|
"learning_rate": 3.205357142857143e-05,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23422059416770935,
|
|
"step": 360,
|
|
"valid_targets_mean": 3167.9,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 0.5712050078247262,
|
|
"grad_norm": 0.6672651026975667,
|
|
"learning_rate": 3.2500000000000004e-05,
|
|
"loss": 0.4613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24593167006969452,
|
|
"step": 365,
|
|
"valid_targets_mean": 4402.4,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 0.5790297339593115,
|
|
"grad_norm": 0.7006584147621673,
|
|
"learning_rate": 3.2946428571428576e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20849832892417908,
|
|
"step": 370,
|
|
"valid_targets_mean": 3675.4,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 0.5868544600938967,
|
|
"grad_norm": 0.7157851296518091,
|
|
"learning_rate": 3.339285714285715e-05,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706051468849182,
|
|
"step": 375,
|
|
"valid_targets_mean": 4019.4,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.594679186228482,
|
|
"grad_norm": 0.7157332252753741,
|
|
"learning_rate": 3.383928571428572e-05,
|
|
"loss": 0.4623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20264701545238495,
|
|
"step": 380,
|
|
"valid_targets_mean": 3199.2,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 0.6025039123630673,
|
|
"grad_norm": 0.799369102205351,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 0.4603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24111491441726685,
|
|
"step": 385,
|
|
"valid_targets_mean": 3169.1,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 0.6103286384976526,
|
|
"grad_norm": 0.7426376819341266,
|
|
"learning_rate": 3.473214285714286e-05,
|
|
"loss": 0.4712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678334712982178,
|
|
"step": 390,
|
|
"valid_targets_mean": 3929.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.6181533646322379,
|
|
"grad_norm": 0.657307349131645,
|
|
"learning_rate": 3.5178571428571434e-05,
|
|
"loss": 0.4343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22758927941322327,
|
|
"step": 395,
|
|
"valid_targets_mean": 4819.9,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 0.6259780907668232,
|
|
"grad_norm": 0.7567412456939797,
|
|
"learning_rate": 3.5625000000000005e-05,
|
|
"loss": 0.4625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26005831360816956,
|
|
"step": 400,
|
|
"valid_targets_mean": 3075.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.6338028169014085,
|
|
"grad_norm": 0.7478089705294009,
|
|
"learning_rate": 3.607142857142858e-05,
|
|
"loss": 0.4418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3258134126663208,
|
|
"step": 405,
|
|
"valid_targets_mean": 3650.8,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.6416275430359938,
|
|
"grad_norm": 0.6771001398207903,
|
|
"learning_rate": 3.651785714285715e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1527598351240158,
|
|
"step": 410,
|
|
"valid_targets_mean": 3169.9,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 0.6494522691705791,
|
|
"grad_norm": 0.7230669768543532,
|
|
"learning_rate": 3.696428571428572e-05,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20782673358917236,
|
|
"step": 415,
|
|
"valid_targets_mean": 4012.6,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 0.6572769953051644,
|
|
"grad_norm": 0.775548092910182,
|
|
"learning_rate": 3.7410714285714285e-05,
|
|
"loss": 0.462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20477062463760376,
|
|
"step": 420,
|
|
"valid_targets_mean": 2886.6,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 0.6651017214397497,
|
|
"grad_norm": 0.7865035541156307,
|
|
"learning_rate": 3.785714285714286e-05,
|
|
"loss": 0.4455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16225959360599518,
|
|
"step": 425,
|
|
"valid_targets_mean": 2094.1,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 0.672926447574335,
|
|
"grad_norm": 0.6628363735311326,
|
|
"learning_rate": 3.830357142857143e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22603219747543335,
|
|
"step": 430,
|
|
"valid_targets_mean": 4385.1,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 0.6807511737089202,
|
|
"grad_norm": 0.7208695643528685,
|
|
"learning_rate": 3.875e-05,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21993932127952576,
|
|
"step": 435,
|
|
"valid_targets_mean": 3617.2,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 0.6885758998435054,
|
|
"grad_norm": 0.7168052314186302,
|
|
"learning_rate": 3.919642857142858e-05,
|
|
"loss": 0.461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15671034157276154,
|
|
"step": 440,
|
|
"valid_targets_mean": 2547.8,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 0.6964006259780907,
|
|
"grad_norm": 0.656438132733893,
|
|
"learning_rate": 3.964285714285715e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16389290988445282,
|
|
"step": 445,
|
|
"valid_targets_mean": 3170.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 0.704225352112676,
|
|
"grad_norm": 0.651229991191371,
|
|
"learning_rate": 3.999999390788695e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19763849675655365,
|
|
"step": 450,
|
|
"valid_targets_mean": 3800.0,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7120500782472613,
|
|
"grad_norm": 0.6850925925578267,
|
|
"learning_rate": 3.999978068431985e-05,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23887932300567627,
|
|
"step": 455,
|
|
"valid_targets_mean": 4639.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.7198748043818466,
|
|
"grad_norm": 0.6603899079442291,
|
|
"learning_rate": 3.999926285881157e-05,
|
|
"loss": 0.4405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15876361727714539,
|
|
"step": 460,
|
|
"valid_targets_mean": 2826.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 0.7276995305164319,
|
|
"grad_norm": 0.8154399636942785,
|
|
"learning_rate": 3.999844043924872e-05,
|
|
"loss": 0.4538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20988821983337402,
|
|
"step": 465,
|
|
"valid_targets_mean": 3077.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 0.7355242566510172,
|
|
"grad_norm": 0.6631470427301489,
|
|
"learning_rate": 3.999731343815697e-05,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24031871557235718,
|
|
"step": 470,
|
|
"valid_targets_mean": 3888.5,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 0.7433489827856025,
|
|
"grad_norm": 0.7333465215347305,
|
|
"learning_rate": 3.999588187270084e-05,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531055212020874,
|
|
"step": 475,
|
|
"valid_targets_mean": 2997.4,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 0.7511737089201878,
|
|
"grad_norm": 0.6856370976466835,
|
|
"learning_rate": 3.999414576468345e-05,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24803884327411652,
|
|
"step": 480,
|
|
"valid_targets_mean": 4322.6,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 0.758998435054773,
|
|
"grad_norm": 0.6603258849163197,
|
|
"learning_rate": 3.99921051405462e-05,
|
|
"loss": 0.4303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19759124517440796,
|
|
"step": 485,
|
|
"valid_targets_mean": 4229.2,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 0.7668231611893583,
|
|
"grad_norm": 0.7216964733628823,
|
|
"learning_rate": 3.998976003136831e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696184813976288,
|
|
"step": 490,
|
|
"valid_targets_mean": 3420.0,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 0.7746478873239436,
|
|
"grad_norm": 0.7448284194732396,
|
|
"learning_rate": 3.998711047286643e-05,
|
|
"loss": 0.4536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607106775045395,
|
|
"step": 495,
|
|
"valid_targets_mean": 2228.0,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.7824726134585289,
|
|
"grad_norm": 0.7676132086632224,
|
|
"learning_rate": 3.998415650539403e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19232915341854095,
|
|
"step": 500,
|
|
"valid_targets_mean": 2695.1,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 0.7902973395931142,
|
|
"grad_norm": 0.7982550324855472,
|
|
"learning_rate": 3.998089817394081e-05,
|
|
"loss": 0.4585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21095818281173706,
|
|
"step": 505,
|
|
"valid_targets_mean": 3264.0,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 0.7981220657276995,
|
|
"grad_norm": 0.6779305833402858,
|
|
"learning_rate": 3.9977335528132026e-05,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1639249175786972,
|
|
"step": 510,
|
|
"valid_targets_mean": 2888.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.8059467918622848,
|
|
"grad_norm": 0.7091004915016293,
|
|
"learning_rate": 3.997346862222771e-05,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597631812095642,
|
|
"step": 515,
|
|
"valid_targets_mean": 4434.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.8137715179968701,
|
|
"grad_norm": 0.7487377683586642,
|
|
"learning_rate": 3.9969297515121856e-05,
|
|
"loss": 0.4498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18227487802505493,
|
|
"step": 520,
|
|
"valid_targets_mean": 2353.5,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 0.8215962441314554,
|
|
"grad_norm": 0.8591015456220985,
|
|
"learning_rate": 3.996482227034154e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27616655826568604,
|
|
"step": 525,
|
|
"valid_targets_mean": 2793.4,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 0.8294209702660407,
|
|
"grad_norm": 0.9639466970936157,
|
|
"learning_rate": 3.996004295604591e-05,
|
|
"loss": 0.4517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20206788182258606,
|
|
"step": 530,
|
|
"valid_targets_mean": 3072.9,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 0.837245696400626,
|
|
"grad_norm": 1.6395124352810895,
|
|
"learning_rate": 3.995495964502519e-05,
|
|
"loss": 0.4294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867642045021057,
|
|
"step": 535,
|
|
"valid_targets_mean": 5131.4,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 0.8450704225352113,
|
|
"grad_norm": 0.7292859707598816,
|
|
"learning_rate": 3.994957241469955e-05,
|
|
"loss": 0.4511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20171989500522614,
|
|
"step": 540,
|
|
"valid_targets_mean": 3031.6,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.8528951486697965,
|
|
"grad_norm": 0.7032508375696713,
|
|
"learning_rate": 3.994388134711792e-05,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22082282602787018,
|
|
"step": 545,
|
|
"valid_targets_mean": 3336.5,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 0.8607198748043818,
|
|
"grad_norm": 0.7567193282188726,
|
|
"learning_rate": 3.993788652895678e-05,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2413557767868042,
|
|
"step": 550,
|
|
"valid_targets_mean": 3789.0,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 0.8685446009389671,
|
|
"grad_norm": 0.6238103919552648,
|
|
"learning_rate": 3.993158805151878e-05,
|
|
"loss": 0.4437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18995603919029236,
|
|
"step": 555,
|
|
"valid_targets_mean": 3753.4,
|
|
"valid_targets_min": 2006
|
|
},
|
|
{
|
|
"epoch": 0.8763693270735524,
|
|
"grad_norm": 0.700751921692497,
|
|
"learning_rate": 3.9924986010731396e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766258120536804,
|
|
"step": 560,
|
|
"valid_targets_mean": 4892.0,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 0.8841940532081377,
|
|
"grad_norm": 0.6181967295356379,
|
|
"learning_rate": 3.991808050714546e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1661914587020874,
|
|
"step": 565,
|
|
"valid_targets_mean": 3967.5,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 0.892018779342723,
|
|
"grad_norm": 0.8956241028755069,
|
|
"learning_rate": 3.99108716459336e-05,
|
|
"loss": 0.4194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17855943739414215,
|
|
"step": 570,
|
|
"valid_targets_mean": 3396.8,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 0.8998435054773083,
|
|
"grad_norm": 0.9226363839898855,
|
|
"learning_rate": 3.990335953688869e-05,
|
|
"loss": 0.4497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475089728832245,
|
|
"step": 575,
|
|
"valid_targets_mean": 4796.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 0.9076682316118936,
|
|
"grad_norm": 0.6543600451566329,
|
|
"learning_rate": 3.989554429442214e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1482318490743637,
|
|
"step": 580,
|
|
"valid_targets_mean": 3055.1,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 0.9154929577464789,
|
|
"grad_norm": 0.798521105826654,
|
|
"learning_rate": 3.988742603756214e-05,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24125681817531586,
|
|
"step": 585,
|
|
"valid_targets_mean": 3164.1,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9233176838810642,
|
|
"grad_norm": 0.6540035250643971,
|
|
"learning_rate": 3.9879004889951896e-05,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1571749746799469,
|
|
"step": 590,
|
|
"valid_targets_mean": 2915.4,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 0.9311424100156495,
|
|
"grad_norm": 0.7635547763202825,
|
|
"learning_rate": 3.98702809798477e-05,
|
|
"loss": 0.4392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19536404311656952,
|
|
"step": 595,
|
|
"valid_targets_mean": 2656.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.9389671361502347,
|
|
"grad_norm": 0.7734721778436154,
|
|
"learning_rate": 3.986125444011702e-05,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155764639377594,
|
|
"step": 600,
|
|
"valid_targets_mean": 2349.4,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 0.94679186228482,
|
|
"grad_norm": 0.6788341381001909,
|
|
"learning_rate": 3.985192540823644e-05,
|
|
"loss": 0.4408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20744667947292328,
|
|
"step": 605,
|
|
"valid_targets_mean": 3652.2,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 0.9546165884194053,
|
|
"grad_norm": 0.6846777634474001,
|
|
"learning_rate": 3.9842294026289565e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14859655499458313,
|
|
"step": 610,
|
|
"valid_targets_mean": 2869.4,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 0.9624413145539906,
|
|
"grad_norm": 0.7844382574790923,
|
|
"learning_rate": 3.9832360440964884e-05,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19733117520809174,
|
|
"step": 615,
|
|
"valid_targets_mean": 2512.4,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 0.9702660406885759,
|
|
"grad_norm": 0.6884827663660276,
|
|
"learning_rate": 3.9822124803553545e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2145414501428604,
|
|
"step": 620,
|
|
"valid_targets_mean": 3326.2,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 0.9780907668231612,
|
|
"grad_norm": 0.9145589133892884,
|
|
"learning_rate": 3.981158726994699e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26253432035446167,
|
|
"step": 625,
|
|
"valid_targets_mean": 3057.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.9859154929577465,
|
|
"grad_norm": 0.885738786563518,
|
|
"learning_rate": 3.980074800063465e-05,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17374148964881897,
|
|
"step": 630,
|
|
"valid_targets_mean": 2987.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.9937402190923318,
|
|
"grad_norm": 0.6357000434510868,
|
|
"learning_rate": 3.978960716070146e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16301760077476501,
|
|
"step": 635,
|
|
"valid_targets_mean": 3903.8,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.001564945226917,
|
|
"grad_norm": 0.639667432239434,
|
|
"learning_rate": 3.977816491982534e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1870630979537964,
|
|
"step": 640,
|
|
"valid_targets_mean": 3787.5,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 1.0093896713615023,
|
|
"grad_norm": 0.6589817737683686,
|
|
"learning_rate": 3.976642145227465e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25719788670539856,
|
|
"step": 645,
|
|
"valid_targets_mean": 5377.9,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 1.0172143974960877,
|
|
"grad_norm": 0.594057166305236,
|
|
"learning_rate": 3.97543769369055e-05,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17982129752635956,
|
|
"step": 650,
|
|
"valid_targets_mean": 4424.2,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 1.0250391236306728,
|
|
"grad_norm": 0.6803281649431138,
|
|
"learning_rate": 3.974203155715904e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1867012083530426,
|
|
"step": 655,
|
|
"valid_targets_mean": 2905.9,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 1.0328638497652582,
|
|
"grad_norm": 0.7564974958032672,
|
|
"learning_rate": 3.972938550105867e-05,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19446754455566406,
|
|
"step": 660,
|
|
"valid_targets_mean": 2813.4,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 1.0406885758998434,
|
|
"grad_norm": 0.7206974254851316,
|
|
"learning_rate": 3.971643896120715e-05,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27469033002853394,
|
|
"step": 665,
|
|
"valid_targets_mean": 4397.9,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 1.0485133020344288,
|
|
"grad_norm": 0.6242407800724762,
|
|
"learning_rate": 3.970319213478371e-05,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21377065777778625,
|
|
"step": 670,
|
|
"valid_targets_mean": 5331.2,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 1.056338028169014,
|
|
"grad_norm": 0.6312651754876695,
|
|
"learning_rate": 3.9689645223541024e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19122716784477234,
|
|
"step": 675,
|
|
"valid_targets_mean": 3585.5,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 1.0641627543035994,
|
|
"grad_norm": 0.8012438324909986,
|
|
"learning_rate": 3.967579843380211e-05,
|
|
"loss": 0.4233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24683937430381775,
|
|
"step": 680,
|
|
"valid_targets_mean": 2930.2,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.0719874804381846,
|
|
"grad_norm": 0.6671738015457859,
|
|
"learning_rate": 3.9661651976457236e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17930462956428528,
|
|
"step": 685,
|
|
"valid_targets_mean": 3398.0,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 1.07981220657277,
|
|
"grad_norm": 0.7007449442911007,
|
|
"learning_rate": 3.9647206066960684e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643488049507141,
|
|
"step": 690,
|
|
"valid_targets_mean": 4009.8,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 1.0876369327073552,
|
|
"grad_norm": 0.662324168656589,
|
|
"learning_rate": 3.9632460925327477e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126633882522583,
|
|
"step": 695,
|
|
"valid_targets_mean": 2596.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.0954616588419406,
|
|
"grad_norm": 0.8234060871435065,
|
|
"learning_rate": 3.961741677613001e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1931648701429367,
|
|
"step": 700,
|
|
"valid_targets_mean": 2667.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.1032863849765258,
|
|
"grad_norm": 0.6167002190922459,
|
|
"learning_rate": 3.960207384849465e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23024515807628632,
|
|
"step": 705,
|
|
"valid_targets_mean": 4928.0,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.7510377632531686,
|
|
"learning_rate": 3.958643237609823e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17680156230926514,
|
|
"step": 710,
|
|
"valid_targets_mean": 2539.6,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.1189358372456963,
|
|
"grad_norm": 1.1804610928040098,
|
|
"learning_rate": 3.9570492597164524e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19640667736530304,
|
|
"step": 715,
|
|
"valid_targets_mean": 2727.6,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.1267605633802817,
|
|
"grad_norm": 0.5882422759828964,
|
|
"learning_rate": 3.955425475446055e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16937266290187836,
|
|
"step": 720,
|
|
"valid_targets_mean": 4588.6,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 1.134585289514867,
|
|
"grad_norm": 0.7690545247172926,
|
|
"learning_rate": 3.953771909529295e-05,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17046384513378143,
|
|
"step": 725,
|
|
"valid_targets_mean": 2883.0,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 1.1424100156494523,
|
|
"grad_norm": 0.6934665488530827,
|
|
"learning_rate": 3.952088587150419e-05,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1978292465209961,
|
|
"step": 730,
|
|
"valid_targets_mean": 4349.5,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 1.1502347417840375,
|
|
"grad_norm": 0.7530194838918431,
|
|
"learning_rate": 3.9503755339468704e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596890926361084,
|
|
"step": 735,
|
|
"valid_targets_mean": 4582.6,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.158059467918623,
|
|
"grad_norm": 0.7909428398934251,
|
|
"learning_rate": 3.9486327760089015e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226276636123657,
|
|
"step": 740,
|
|
"valid_targets_mean": 4752.1,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 1.165884194053208,
|
|
"grad_norm": 0.760054548354086,
|
|
"learning_rate": 3.946860339879177e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21802927553653717,
|
|
"step": 745,
|
|
"valid_targets_mean": 3674.9,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.1737089201877935,
|
|
"grad_norm": 0.7056909948886079,
|
|
"learning_rate": 3.945058252552366e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24407556653022766,
|
|
"step": 750,
|
|
"valid_targets_mean": 3831.5,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.1815336463223787,
|
|
"grad_norm": 0.5868473814725246,
|
|
"learning_rate": 3.943226541474734e-05,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522311568260193,
|
|
"step": 755,
|
|
"valid_targets_mean": 3784.0,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 1.189358372456964,
|
|
"grad_norm": 0.7203853270172556,
|
|
"learning_rate": 3.941365234543727e-05,
|
|
"loss": 0.4427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19159486889839172,
|
|
"step": 760,
|
|
"valid_targets_mean": 2532.0,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 1.1971830985915493,
|
|
"grad_norm": 0.7500662236394489,
|
|
"learning_rate": 3.9394743601075384e-05,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21980783343315125,
|
|
"step": 765,
|
|
"valid_targets_mean": 2943.5,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 1.2050078247261347,
|
|
"grad_norm": 0.9308730125314616,
|
|
"learning_rate": 3.9375539469646866e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23754967749118805,
|
|
"step": 770,
|
|
"valid_targets_mean": 4920.1,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 1.2128325508607198,
|
|
"grad_norm": 0.7662158026504272,
|
|
"learning_rate": 3.9356040243635695e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2245776355266571,
|
|
"step": 775,
|
|
"valid_targets_mean": 2544.5,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.2206572769953052,
|
|
"grad_norm": 0.6237770351179932,
|
|
"learning_rate": 3.9336246220020254e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21610769629478455,
|
|
"step": 780,
|
|
"valid_targets_mean": 3952.4,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 1.2284820031298904,
|
|
"grad_norm": 0.7267532906365423,
|
|
"learning_rate": 3.931615770026874e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3266049027442932,
|
|
"step": 785,
|
|
"valid_targets_mean": 4854.1,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 1.2363067292644758,
|
|
"grad_norm": 0.6383454956244266,
|
|
"learning_rate": 3.9295774990334604e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17154471576213837,
|
|
"step": 790,
|
|
"valid_targets_mean": 3347.1,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.244131455399061,
|
|
"grad_norm": 0.7597717024361907,
|
|
"learning_rate": 3.927509840065191e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381105124950409,
|
|
"step": 795,
|
|
"valid_targets_mean": 3532.8,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 1.2519561815336462,
|
|
"grad_norm": 0.6023113321029256,
|
|
"learning_rate": 3.9254128246130574e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17777009308338165,
|
|
"step": 800,
|
|
"valid_targets_mean": 3731.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 1.2597809076682316,
|
|
"grad_norm": 0.7376398905479149,
|
|
"learning_rate": 3.9232864846151596e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15244904160499573,
|
|
"step": 805,
|
|
"valid_targets_mean": 2228.2,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.267605633802817,
|
|
"grad_norm": 0.6729384053955971,
|
|
"learning_rate": 3.921130852456216e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17800085246562958,
|
|
"step": 810,
|
|
"valid_targets_mean": 3303.1,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.2754303599374022,
|
|
"grad_norm": 0.6957048701615863,
|
|
"learning_rate": 3.918945960967075e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21751374006271362,
|
|
"step": 815,
|
|
"valid_targets_mean": 5027.5,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 1.2832550860719873,
|
|
"grad_norm": 0.6645777447862615,
|
|
"learning_rate": 3.9167318434242096e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218622863292694,
|
|
"step": 820,
|
|
"valid_targets_mean": 3654.9,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 1.2910798122065728,
|
|
"grad_norm": 0.7053642375629554,
|
|
"learning_rate": 3.9144885335492163e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15899214148521423,
|
|
"step": 825,
|
|
"valid_targets_mean": 2802.2,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 1.2989045383411582,
|
|
"grad_norm": 0.6432109793054765,
|
|
"learning_rate": 3.912216065508295e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21869224309921265,
|
|
"step": 830,
|
|
"valid_targets_mean": 4092.5,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.3067292644757433,
|
|
"grad_norm": 0.7404329360244081,
|
|
"learning_rate": 3.909914473911735e-05,
|
|
"loss": 0.4138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715874820947647,
|
|
"step": 835,
|
|
"valid_targets_mean": 2490.0,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.3145539906103285,
|
|
"grad_norm": 0.6201474315629757,
|
|
"learning_rate": 3.9075837938133845e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17279471457004547,
|
|
"step": 840,
|
|
"valid_targets_mean": 3576.5,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 1.322378716744914,
|
|
"grad_norm": 0.6578607768235923,
|
|
"learning_rate": 3.905224060710116e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19631803035736084,
|
|
"step": 845,
|
|
"valid_targets_mean": 5221.0,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 1.3302034428794993,
|
|
"grad_norm": 0.7307187872250055,
|
|
"learning_rate": 3.902835310541288e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21276363730430603,
|
|
"step": 850,
|
|
"valid_targets_mean": 3194.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.3380281690140845,
|
|
"grad_norm": 0.5876790978074697,
|
|
"learning_rate": 3.9004175796881976e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23284240067005157,
|
|
"step": 855,
|
|
"valid_targets_mean": 5231.5,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 1.3458528951486697,
|
|
"grad_norm": 0.7692201829544254,
|
|
"learning_rate": 3.8979709049735234e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274081289768219,
|
|
"step": 860,
|
|
"valid_targets_mean": 4140.6,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.353677621283255,
|
|
"grad_norm": 0.726339499578564,
|
|
"learning_rate": 3.8954953236607656e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19835659861564636,
|
|
"step": 865,
|
|
"valid_targets_mean": 2668.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.3615023474178405,
|
|
"grad_norm": 0.6653025175501566,
|
|
"learning_rate": 3.892990873453684e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16919967532157898,
|
|
"step": 870,
|
|
"valid_targets_mean": 3507.4,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 1.3693270735524257,
|
|
"grad_norm": 0.6859238937075761,
|
|
"learning_rate": 3.8904575924957144e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16416238248348236,
|
|
"step": 875,
|
|
"valid_targets_mean": 3005.1,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 1.3771517996870108,
|
|
"grad_norm": 0.8485946580580555,
|
|
"learning_rate": 3.887895519369397e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20771420001983643,
|
|
"step": 880,
|
|
"valid_targets_mean": 2648.1,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 1.3849765258215962,
|
|
"grad_norm": 0.6750959115279427,
|
|
"learning_rate": 3.8853046930957807e-05,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19531777501106262,
|
|
"step": 885,
|
|
"valid_targets_mean": 3451.9,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 1.3928012519561817,
|
|
"grad_norm": 0.6815041421381945,
|
|
"learning_rate": 3.882685153133833e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17360779643058777,
|
|
"step": 890,
|
|
"valid_targets_mean": 3061.1,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 1.4006259780907668,
|
|
"grad_norm": 0.7550998641455068,
|
|
"learning_rate": 3.8800369393798415e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336001455783844,
|
|
"step": 895,
|
|
"valid_targets_mean": 3669.2,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.408450704225352,
|
|
"grad_norm": 0.7021053836841473,
|
|
"learning_rate": 3.877360092166799e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596614956855774,
|
|
"step": 900,
|
|
"valid_targets_mean": 3820.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.4162754303599374,
|
|
"grad_norm": 0.5299514262446642,
|
|
"learning_rate": 3.874654652263797e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18043017387390137,
|
|
"step": 905,
|
|
"valid_targets_mean": 5243.0,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 1.4241001564945228,
|
|
"grad_norm": 0.7933768855467818,
|
|
"learning_rate": 3.8719206608753983e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397492527961731,
|
|
"step": 910,
|
|
"valid_targets_mean": 4625.8,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 1.431924882629108,
|
|
"grad_norm": 0.6373963792571129,
|
|
"learning_rate": 3.8691581596410144e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14870958030223846,
|
|
"step": 915,
|
|
"valid_targets_mean": 2912.1,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 1.4397496087636932,
|
|
"grad_norm": 0.611685252365439,
|
|
"learning_rate": 3.866367190634268e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526041328907013,
|
|
"step": 920,
|
|
"valid_targets_mean": 3902.0,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.4475743348982786,
|
|
"grad_norm": 0.6937332133849283,
|
|
"learning_rate": 3.863547796362355e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28414416313171387,
|
|
"step": 925,
|
|
"valid_targets_mean": 4253.8,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.455399061032864,
|
|
"grad_norm": 0.7259271519349882,
|
|
"learning_rate": 3.8607000197653944e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689603567123413,
|
|
"step": 930,
|
|
"valid_targets_mean": 3687.4,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.4632237871674492,
|
|
"grad_norm": 0.730694228257684,
|
|
"learning_rate": 3.857823904215776e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19418419897556305,
|
|
"step": 935,
|
|
"valid_targets_mean": 3124.6,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 1.4710485133020343,
|
|
"grad_norm": 0.6961877053634622,
|
|
"learning_rate": 3.854919493517498e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21480441093444824,
|
|
"step": 940,
|
|
"valid_targets_mean": 3742.4,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 1.4788732394366197,
|
|
"grad_norm": 0.7317365942025466,
|
|
"learning_rate": 3.8519868319055034e-05,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993600130081177,
|
|
"step": 945,
|
|
"valid_targets_mean": 2296.4,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.486697965571205,
|
|
"grad_norm": 0.6337265629864766,
|
|
"learning_rate": 3.849025964045002e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555607557296753,
|
|
"step": 950,
|
|
"valid_targets_mean": 4452.9,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 1.4945226917057903,
|
|
"grad_norm": 0.5744571412715943,
|
|
"learning_rate": 3.846036935030795e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22037804126739502,
|
|
"step": 955,
|
|
"valid_targets_mean": 5034.4,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 1.5023474178403755,
|
|
"grad_norm": 0.7157005558572016,
|
|
"learning_rate": 3.843019790386581e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18532317876815796,
|
|
"step": 960,
|
|
"valid_targets_mean": 3143.9,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 1.510172143974961,
|
|
"grad_norm": 0.5692157566146229,
|
|
"learning_rate": 3.839974576064273e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893215775489807,
|
|
"step": 965,
|
|
"valid_targets_mean": 4337.1,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 1.5179968701095463,
|
|
"grad_norm": 0.7800625316720775,
|
|
"learning_rate": 3.8369013384432856e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29718929529190063,
|
|
"step": 970,
|
|
"valid_targets_mean": 4164.5,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 1.5258215962441315,
|
|
"grad_norm": 0.7585306174886708,
|
|
"learning_rate": 3.833800124329842e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18668968975543976,
|
|
"step": 975,
|
|
"valid_targets_mean": 3376.9,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 1.5336463223787167,
|
|
"grad_norm": 0.6178910144979316,
|
|
"learning_rate": 3.8306709809562515e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15267477929592133,
|
|
"step": 980,
|
|
"valid_targets_mean": 3110.0,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 1.541471048513302,
|
|
"grad_norm": 0.6477362724369636,
|
|
"learning_rate": 3.827513955980193e-05,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14183005690574646,
|
|
"step": 985,
|
|
"valid_targets_mean": 2165.2,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 1.5492957746478875,
|
|
"grad_norm": 0.6647712733299704,
|
|
"learning_rate": 3.824329097483991e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19861292839050293,
|
|
"step": 990,
|
|
"valid_targets_mean": 3237.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.5571205007824727,
|
|
"grad_norm": 0.7113614310009018,
|
|
"learning_rate": 3.8211164539738826e-05,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19258275628089905,
|
|
"step": 995,
|
|
"valid_targets_mean": 3244.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 1.5649452269170578,
|
|
"grad_norm": 0.5927514366636316,
|
|
"learning_rate": 3.817876074379275e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19110935926437378,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4737.9,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 1.5727699530516432,
|
|
"grad_norm": 0.7037322407031599,
|
|
"learning_rate": 3.8146080080520066e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21262872219085693,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4051.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.5805946791862286,
|
|
"grad_norm": 0.7144961978061335,
|
|
"learning_rate": 3.81131230476559e-05,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12801748514175415,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2675.5,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 1.5884194053208138,
|
|
"grad_norm": 0.7215497755633314,
|
|
"learning_rate": 3.8079890147144565e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17543953657150269,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2495.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 1.596244131455399,
|
|
"grad_norm": 0.7149671274531879,
|
|
"learning_rate": 3.804638188513191e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2042284905910492,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3829.6,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.6040688575899842,
|
|
"grad_norm": 0.6847260674812208,
|
|
"learning_rate": 3.8012598771957616e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2272772639989853,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4251.0,
|
|
"valid_targets_min": 2334
|
|
},
|
|
{
|
|
"epoch": 1.6118935837245696,
|
|
"grad_norm": 0.5635946172286166,
|
|
"learning_rate": 3.797854132214742e-05,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1589568555355072,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4572.2,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 1.619718309859155,
|
|
"grad_norm": 0.6177538500301041,
|
|
"learning_rate": 3.7944210054405274e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15217478573322296,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2709.0,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 1.6275430359937402,
|
|
"grad_norm": 0.8284362074696278,
|
|
"learning_rate": 3.790960549160545e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2103167027235031,
|
|
"step": 1040,
|
|
"valid_targets_mean": 2956.8,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.6353677621283254,
|
|
"grad_norm": 0.6707117576517818,
|
|
"learning_rate": 3.7874728160784575e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20466859638690948,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3299.8,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.6431924882629108,
|
|
"grad_norm": 0.5889669066734561,
|
|
"learning_rate": 3.7839578593133624e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2284867763519287,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4419.0,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 1.6510172143974962,
|
|
"grad_norm": 0.7126499958543611,
|
|
"learning_rate": 3.780415732398977e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2121860235929489,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2730.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 1.6588419405320813,
|
|
"grad_norm": 0.6755315583434616,
|
|
"learning_rate": 3.7768464892828316e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24364404380321503,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3872.4,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6120351411690437,
|
|
"learning_rate": 3.77325018432544e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25968432426452637,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5192.4,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 1.674491392801252,
|
|
"grad_norm": 0.7638527392397952,
|
|
"learning_rate": 3.769626872299477e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19384410977363586,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3573.2,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.6823161189358373,
|
|
"grad_norm": 0.6183641692448105,
|
|
"learning_rate": 3.765976608388942e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667187660932541,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4197.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.6901408450704225,
|
|
"grad_norm": 0.5927427753709974,
|
|
"learning_rate": 3.7622994481883175e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11875749379396439,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2953.2,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 1.6979655712050077,
|
|
"grad_norm": 0.6621084011481048,
|
|
"learning_rate": 3.7585954477017246e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16531771421432495,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2920.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.705790297339593,
|
|
"grad_norm": 0.6770338116975819,
|
|
"learning_rate": 3.754864663342069e-05,
|
|
"loss": 0.391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1784895807504654,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2558.5,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.7136150234741785,
|
|
"grad_norm": 0.9068762968334044,
|
|
"learning_rate": 3.751107151930182e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21789948642253876,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3169.6,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 1.7214397496087637,
|
|
"grad_norm": 0.6037598046217117,
|
|
"learning_rate": 3.747322970693954e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15747937560081482,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3359.9,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 1.7292644757433489,
|
|
"grad_norm": 0.5998719503670195,
|
|
"learning_rate": 3.743512177267464e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19458629190921783,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4155.5,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 1.7370892018779343,
|
|
"grad_norm": 0.7699947633108596,
|
|
"learning_rate": 3.7396748296901045e-05,
|
|
"loss": 0.4134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2035745531320572,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2242.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.7449139280125197,
|
|
"grad_norm": 0.5045663123165887,
|
|
"learning_rate": 3.7358109864056895e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745176911354065,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4955.0,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 1.7527386541471048,
|
|
"grad_norm": 0.7214087415437522,
|
|
"learning_rate": 3.731920706261575e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18232125043869019,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2670.4,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 1.76056338028169,
|
|
"grad_norm": 0.6013907044720654,
|
|
"learning_rate": 3.728004048507753e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17218884825706482,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3589.4,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 1.7683881064162754,
|
|
"grad_norm": 0.7393173421314084,
|
|
"learning_rate": 3.724061072795957e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1833842247724533,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3005.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 1.7762128325508608,
|
|
"grad_norm": 0.6492959339669794,
|
|
"learning_rate": 3.7200918391787474e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065048098564148,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3426.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 1.784037558685446,
|
|
"grad_norm": 0.6731687558184775,
|
|
"learning_rate": 3.716096408108601e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16753613948822021,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3501.1,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 1.7918622848200312,
|
|
"grad_norm": 0.7166648462525042,
|
|
"learning_rate": 3.7120748404369866e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1347351223230362,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2145.1,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 1.7996870109546166,
|
|
"grad_norm": 0.9203375203859201,
|
|
"learning_rate": 3.7080271974134434e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16080577671527863,
|
|
"step": 1150,
|
|
"valid_targets_mean": 1909.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.807511737089202,
|
|
"grad_norm": 0.7617152685885762,
|
|
"learning_rate": 3.703953540684643e-05,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1802678108215332,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3560.0,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 1.8153364632237872,
|
|
"grad_norm": 0.6635499060363163,
|
|
"learning_rate": 3.6998539322934525e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15765205025672913,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2704.8,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 1.8231611893583723,
|
|
"grad_norm": 0.5656046136421714,
|
|
"learning_rate": 3.695728434677992e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23618967831134796,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5334.2,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 1.8309859154929577,
|
|
"grad_norm": 0.7726818510167083,
|
|
"learning_rate": 3.691577110670677e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19265776872634888,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2686.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.8388106416275432,
|
|
"grad_norm": 0.7436173114178048,
|
|
"learning_rate": 3.6874000234972706e-05,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513878345489502,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4044.6,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 1.8466353677621283,
|
|
"grad_norm": 0.8009879717013212,
|
|
"learning_rate": 3.6831972367759126e-05,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18989716470241547,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2514.2,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 1.8544600938967135,
|
|
"grad_norm": 0.6247012929013743,
|
|
"learning_rate": 3.6789688145161544e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19752906262874603,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3648.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.862284820031299,
|
|
"grad_norm": 0.5906470192123663,
|
|
"learning_rate": 3.6747148211179846e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19515764713287354,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4471.6,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 1.8701095461658843,
|
|
"grad_norm": 0.6819106968117687,
|
|
"learning_rate": 3.670435321370845e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20009064674377441,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3691.4,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 1.8779342723004695,
|
|
"grad_norm": 0.6347498676408799,
|
|
"learning_rate": 3.666130380452647e-05,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19641920924186707,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3737.6,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 1.8857589984350547,
|
|
"grad_norm": 0.7184829602744343,
|
|
"learning_rate": 3.6618000639287784e-05,
|
|
"loss": 0.3731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803753674030304,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2805.9,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 1.89358372456964,
|
|
"grad_norm": 0.6260780470321379,
|
|
"learning_rate": 3.6574444377511025e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22460733354091644,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3359.2,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 1.9014084507042255,
|
|
"grad_norm": 0.5470585072357231,
|
|
"learning_rate": 3.653063568256956e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13702835142612457,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.9092331768388107,
|
|
"grad_norm": 0.7232804566214297,
|
|
"learning_rate": 3.6486575221681386e-05,
|
|
"loss": 0.3662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505659818649292,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2598.2,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 1.9170579029733958,
|
|
"grad_norm": 0.5633804483361838,
|
|
"learning_rate": 3.6442263665898964e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2138233333826065,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4199.0,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.9248826291079812,
|
|
"grad_norm": 0.6786409032467642,
|
|
"learning_rate": 3.6397701690098974e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1773233711719513,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3829.2,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 1.9327073552425666,
|
|
"grad_norm": 0.6591424930389601,
|
|
"learning_rate": 3.6352889972972095e-05,
|
|
"loss": 0.3753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17036482691764832,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3500.4,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 1.9405320813771518,
|
|
"grad_norm": 0.7077791461189813,
|
|
"learning_rate": 3.63078291970126e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294906377792358,
|
|
"step": 1240,
|
|
"valid_targets_mean": 2433.4,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 1.948356807511737,
|
|
"grad_norm": 0.6967578987967685,
|
|
"learning_rate": 3.626252004850799e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1783074289560318,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3461.1,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.9561815336463224,
|
|
"grad_norm": 0.8562815740685973,
|
|
"learning_rate": 3.62169632175286e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19697491824626923,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2736.9,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.9640062597809078,
|
|
"grad_norm": 0.6060641699425041,
|
|
"learning_rate": 3.617115939791697e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15751199424266815,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3641.0,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 1.971830985915493,
|
|
"grad_norm": 0.6592097626055842,
|
|
"learning_rate": 3.612510928727737e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18298685550689697,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3392.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.9796557120500782,
|
|
"grad_norm": 0.6817812684515094,
|
|
"learning_rate": 3.6078813586965155e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15940967202186584,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2934.5,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.9874804381846636,
|
|
"grad_norm": 0.7508693743974668,
|
|
"learning_rate": 3.6032273002076054e-05,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20297561585903168,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3161.0,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.995305164319249,
|
|
"grad_norm": 0.6706266867275804,
|
|
"learning_rate": 3.598548824143547e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458023011684418,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3025.8,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 2.003129890453834,
|
|
"grad_norm": 0.685262913901234,
|
|
"learning_rate": 3.593846001758767e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17206820845603943,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3466.5,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 2.0109546165884193,
|
|
"grad_norm": 1.8674433038866582,
|
|
"learning_rate": 3.589118904678491e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378794550895691,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2577.2,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 2.0187793427230045,
|
|
"grad_norm": 0.62294260026305,
|
|
"learning_rate": 3.584367604897657e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336679339408875,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2902.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 2.02660406885759,
|
|
"grad_norm": 0.729834913569975,
|
|
"learning_rate": 3.5795921747798136e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16659823060035706,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3073.8,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 2.0344287949921753,
|
|
"grad_norm": 0.7647535281813881,
|
|
"learning_rate": 3.5747926870560244e-05,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720337510108948,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3253.2,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.0422535211267605,
|
|
"grad_norm": 0.5858770789828733,
|
|
"learning_rate": 3.569969214823753e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10878610610961914,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2152.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.0500782472613457,
|
|
"grad_norm": 0.8626879485669101,
|
|
"learning_rate": 3.565121831545757e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18130023777484894,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2235.8,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 2.0579029733959313,
|
|
"grad_norm": 0.6815234847975316,
|
|
"learning_rate": 3.5602506110489634e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20253059267997742,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4068.8,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 2.0657276995305165,
|
|
"grad_norm": 0.536021862725641,
|
|
"learning_rate": 3.555355627523347e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13307343423366547,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4160.1,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 2.0735524256651017,
|
|
"grad_norm": 0.7981389497926075,
|
|
"learning_rate": 3.550436955520798e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2176656723022461,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4008.5,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 2.081377151799687,
|
|
"grad_norm": 0.6613546780732346,
|
|
"learning_rate": 3.545494669953991e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19166335463523865,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4031.5,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 2.0892018779342725,
|
|
"grad_norm": 0.7304110627209885,
|
|
"learning_rate": 3.5405288460952394e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701429933309555,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3012.0,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 2.0970266040688577,
|
|
"grad_norm": 0.709540830552454,
|
|
"learning_rate": 3.535539559575353e-05,
|
|
"loss": 0.3758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1921011507511139,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3778.5,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 2.104851330203443,
|
|
"grad_norm": 0.6947345301307242,
|
|
"learning_rate": 3.5305268863824835e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1054690033197403,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2723.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 2.112676056338028,
|
|
"grad_norm": 0.7775968142163889,
|
|
"learning_rate": 3.5254909028609654e-05,
|
|
"loss": 0.3487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15200592577457428,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2029.1,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.1205007824726136,
|
|
"grad_norm": 0.75220763724208,
|
|
"learning_rate": 3.520431685710159e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17445842921733856,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 2.128325508607199,
|
|
"grad_norm": 1.308602795851995,
|
|
"learning_rate": 3.5153493119832776e-05,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20328953862190247,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4163.2,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.136150234741784,
|
|
"grad_norm": 0.8084126390142385,
|
|
"learning_rate": 3.510243859086214e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18786843121051788,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3477.2,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.143974960876369,
|
|
"grad_norm": 0.6705695248353545,
|
|
"learning_rate": 3.505115404776365e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12195180356502533,
|
|
"step": 1370,
|
|
"valid_targets_mean": 2854.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.151799687010955,
|
|
"grad_norm": 0.674568310012371,
|
|
"learning_rate": 3.4999640271614436e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20537444949150085,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3774.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 2.15962441314554,
|
|
"grad_norm": 0.6545521439754447,
|
|
"learning_rate": 3.494789804698291e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14076703786849976,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2945.6,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 2.167449139280125,
|
|
"grad_norm": 0.649477779377324,
|
|
"learning_rate": 3.489592816191683e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14599597454071045,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3076.6,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 2.1752738654147104,
|
|
"grad_norm": 0.5896086623119152,
|
|
"learning_rate": 3.484373140793125e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15792551636695862,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4062.5,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 2.183098591549296,
|
|
"grad_norm": 0.6658126550742101,
|
|
"learning_rate": 3.479130857999653e-05,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18942593038082123,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4026.4,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 2.190923317683881,
|
|
"grad_norm": 0.7360252183929687,
|
|
"learning_rate": 3.4738660476526185e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16910064220428467,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2681.5,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.1987480438184663,
|
|
"grad_norm": 0.7215414725156137,
|
|
"learning_rate": 3.468578789936472e-05,
|
|
"loss": 0.3752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1706930696964264,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3359.2,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 2.2065727699530515,
|
|
"grad_norm": 0.7860796442678976,
|
|
"learning_rate": 3.4632691653775455e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16006585955619812,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3619.9,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.214397496087637,
|
|
"grad_norm": 0.6305229662327004,
|
|
"learning_rate": 3.457937254842823e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21308350563049316,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4289.0,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.858512066903637,
|
|
"learning_rate": 3.452583139538711e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19860151410102844,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2815.4,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 2.2300469483568075,
|
|
"grad_norm": 0.6452785972535862,
|
|
"learning_rate": 3.447206901009798e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23823179304599762,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4477.5,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 2.2378716744913927,
|
|
"grad_norm": 0.70707728937259,
|
|
"learning_rate": 3.4418086211376174e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13700878620147705,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2448.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.2456964006259783,
|
|
"grad_norm": 0.9064702597963882,
|
|
"learning_rate": 3.436388382139396e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2127435803413391,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2416.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.2535211267605635,
|
|
"grad_norm": 0.6567509378451356,
|
|
"learning_rate": 3.4309462665668065e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1733391284942627,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4013.5,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 2.2613458528951487,
|
|
"grad_norm": 0.6719828635579147,
|
|
"learning_rate": 3.425482357304706e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22682076692581177,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4123.0,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 2.269170579029734,
|
|
"grad_norm": 0.6324778711656667,
|
|
"learning_rate": 3.419996737569875e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619684398174286,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4254.2,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 2.276995305164319,
|
|
"grad_norm": 0.7063365064646465,
|
|
"learning_rate": 3.41448949090975e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19507917761802673,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3578.1,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 2.2848200312989047,
|
|
"grad_norm": 0.807418151904061,
|
|
"learning_rate": 3.408960701201153e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417492777109146,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2788.9,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.29264475743349,
|
|
"grad_norm": 0.6432692413266714,
|
|
"learning_rate": 3.403410452649011e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10802759975194931,
|
|
"step": 1465,
|
|
"valid_targets_mean": 1949.0,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 2.300469483568075,
|
|
"grad_norm": 0.505316636423369,
|
|
"learning_rate": 3.397838829785075e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12446225434541702,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3853.0,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 2.3082942097026606,
|
|
"grad_norm": 0.6400952551334692,
|
|
"learning_rate": 3.392245917466632e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2172216922044754,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4219.9,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 2.316118935837246,
|
|
"grad_norm": 0.6565030100779823,
|
|
"learning_rate": 3.386631800875214e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2035527527332306,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4151.1,
|
|
"valid_targets_min": 1781
|
|
},
|
|
{
|
|
"epoch": 2.323943661971831,
|
|
"grad_norm": 0.5839527105142958,
|
|
"learning_rate": 3.3809965655152996e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19896285235881805,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3109.4,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 2.331768388106416,
|
|
"grad_norm": 0.6842647170887683,
|
|
"learning_rate": 3.375340297213011e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16413629055023193,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3574.8,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 2.3395931142410014,
|
|
"grad_norm": 0.69462881160759,
|
|
"learning_rate": 3.369663082114809e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11714695394039154,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2714.1,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 2.347417840375587,
|
|
"grad_norm": 0.6713654341245846,
|
|
"learning_rate": 3.3639650066861764e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16025574505329132,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3618.1,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 2.355242566510172,
|
|
"grad_norm": 0.7066673948786524,
|
|
"learning_rate": 3.3582461577103096e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13800875842571259,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2171.5,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 2.3630672926447573,
|
|
"grad_norm": 0.6053958800224623,
|
|
"learning_rate": 3.352506622286786e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11900651454925537,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3117.6,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 2.370892018779343,
|
|
"grad_norm": 0.6848503882157774,
|
|
"learning_rate": 3.346746487830248e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18061873316764832,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3196.5,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 2.378716744913928,
|
|
"grad_norm": 0.7214832245659588,
|
|
"learning_rate": 3.3409658420690634e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17256510257720947,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2498.2,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 2.3865414710485133,
|
|
"grad_norm": 0.7543904875406556,
|
|
"learning_rate": 3.3351647730439936e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17104142904281616,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2772.9,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 2.3943661971830985,
|
|
"grad_norm": 0.709894565036357,
|
|
"learning_rate": 3.329343369106852e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20762532949447632,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5609.6,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 2.4021909233176837,
|
|
"grad_norm": 0.746134744854618,
|
|
"learning_rate": 3.323501718919157e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23197545111179352,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4335.0,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 2.4100156494522693,
|
|
"grad_norm": 0.6047435383932257,
|
|
"learning_rate": 3.317639911450785e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438893973827362,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3808.4,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 2.4178403755868545,
|
|
"grad_norm": 0.6592474492291397,
|
|
"learning_rate": 3.311758035978611e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19255056977272034,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3853.4,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 2.4256651017214397,
|
|
"grad_norm": 0.5858655509379639,
|
|
"learning_rate": 3.3058561820851513e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17494967579841614,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4236.4,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 2.433489827856025,
|
|
"grad_norm": 0.6520129699348667,
|
|
"learning_rate": 3.299934439657199e-05,
|
|
"loss": 0.3415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24300023913383484,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4887.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 2.4413145539906105,
|
|
"grad_norm": 0.6782965396700703,
|
|
"learning_rate": 3.293992898884456e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10579371452331543,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3069.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.4491392801251957,
|
|
"grad_norm": 0.5987741963980023,
|
|
"learning_rate": 3.288031650258157e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2112443894147873,
|
|
"step": 1565,
|
|
"valid_targets_mean": 6057.2,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 2.456964006259781,
|
|
"grad_norm": 0.5656501271758022,
|
|
"learning_rate": 3.282050784569693e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22649559378623962,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5101.9,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 2.464788732394366,
|
|
"grad_norm": 0.8140141282880866,
|
|
"learning_rate": 3.276050392909227e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1896408498287201,
|
|
"step": 1575,
|
|
"valid_targets_mean": 1879.0,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 2.4726134585289516,
|
|
"grad_norm": 0.655992144734348,
|
|
"learning_rate": 3.270030566664309e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19068443775177002,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4076.4,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 2.480438184663537,
|
|
"grad_norm": 0.6948794663037432,
|
|
"learning_rate": 3.2639913975184825e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735239326953888,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3140.2,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 2.488262910798122,
|
|
"grad_norm": 0.6161285781896318,
|
|
"learning_rate": 3.257932977449888e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15244409441947937,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3107.4,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 2.496087636932707,
|
|
"grad_norm": 0.7107679118121952,
|
|
"learning_rate": 3.2518553987298624e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14138063788414001,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2491.2,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.5039123630672924,
|
|
"grad_norm": 0.6576723634836228,
|
|
"learning_rate": 3.2457587539215364e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21305537223815918,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4252.0,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 2.511737089201878,
|
|
"grad_norm": 0.7607673017237766,
|
|
"learning_rate": 3.239643135878419e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15359759330749512,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2528.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 2.519561815336463,
|
|
"grad_norm": 0.7807078792275984,
|
|
"learning_rate": 3.233508637742988e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2223649024963379,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2828.5,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 2.5273865414710484,
|
|
"grad_norm": 0.6804491992230792,
|
|
"learning_rate": 3.2273553529452696e-05,
|
|
"loss": 0.379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669369578361511,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3789.4,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.535211267605634,
|
|
"grad_norm": 0.8892829966151747,
|
|
"learning_rate": 3.221183375201418e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16043208539485931,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3930.2,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 2.543035993740219,
|
|
"grad_norm": 0.7758412248422415,
|
|
"learning_rate": 3.214992798512282e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14031963050365448,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2586.6,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.5508607198748043,
|
|
"grad_norm": 0.769818736945752,
|
|
"learning_rate": 3.20878371716198e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1873244345188141,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2637.9,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 2.5586854460093895,
|
|
"grad_norm": 0.6585078528555155,
|
|
"learning_rate": 3.2025562257164613e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20774495601654053,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3725.0,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.5665101721439747,
|
|
"grad_norm": 0.6108579108640938,
|
|
"learning_rate": 3.1963104190220645e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08568064123392105,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2280.9,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 2.5743348982785603,
|
|
"grad_norm": 0.6432699024748011,
|
|
"learning_rate": 3.1900463922040746e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18422311544418335,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4695.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 2.5821596244131455,
|
|
"grad_norm": 0.605472424444633,
|
|
"learning_rate": 3.183764240665275e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163361594080925,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3528.6,
|
|
"valid_targets_min": 2541
|
|
},
|
|
{
|
|
"epoch": 2.5899843505477307,
|
|
"grad_norm": 0.65579408998459,
|
|
"learning_rate": 3.177464060084492e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18249675631523132,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3877.4,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 2.5978090766823163,
|
|
"grad_norm": 0.5865989474686111,
|
|
"learning_rate": 3.171145946415139e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13226504623889923,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3065.1,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 2.6056338028169015,
|
|
"grad_norm": 0.7209533802424393,
|
|
"learning_rate": 3.164809995883757e-05,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13644996285438538,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2266.9,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 2.6134585289514867,
|
|
"grad_norm": 0.7718411743947633,
|
|
"learning_rate": 3.1584563049885444e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1840563267469406,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2591.2,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.621283255086072,
|
|
"grad_norm": 0.6683484557535657,
|
|
"learning_rate": 3.152084970497893e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1835951954126358,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2722.0,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.629107981220657,
|
|
"grad_norm": 0.636147374620835,
|
|
"learning_rate": 3.145696089448907e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715715169906616,
|
|
"step": 1680,
|
|
"valid_targets_mean": 5222.5,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 2.6369327073552427,
|
|
"grad_norm": 0.6531954236062774,
|
|
"learning_rate": 3.1392897591459343e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19955246150493622,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4543.9,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 2.644757433489828,
|
|
"grad_norm": 0.5924949569131973,
|
|
"learning_rate": 3.1328660771590766e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11615298688411713,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2995.5,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 2.652582159624413,
|
|
"grad_norm": 0.684521852352277,
|
|
"learning_rate": 3.126425141322707e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20662981271743774,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3490.2,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 2.6604068857589986,
|
|
"grad_norm": 0.5813373139566878,
|
|
"learning_rate": 3.119967049733977e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20194265246391296,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4772.2,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 2.668231611893584,
|
|
"grad_norm": 0.6889882032005892,
|
|
"learning_rate": 3.1134919007513295e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23412039875984192,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3899.9,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 2.676056338028169,
|
|
"grad_norm": 0.5132099711595682,
|
|
"learning_rate": 3.106999792992993e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14153335988521576,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4313.5,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 2.683881064162754,
|
|
"grad_norm": 0.8557401389389555,
|
|
"learning_rate": 3.100490825335482e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18945080041885376,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3288.5,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.6917057902973394,
|
|
"grad_norm": 0.6582173449043957,
|
|
"learning_rate": 3.093965096912094e-05,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18182674050331116,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3783.6,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 2.699530516431925,
|
|
"grad_norm": 0.7150210584564919,
|
|
"learning_rate": 3.0874227071113936e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19261327385902405,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3304.0,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.70735524256651,
|
|
"grad_norm": 0.7684956258499176,
|
|
"learning_rate": 3.080863755575709e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16685187816619873,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3672.9,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 2.7151799687010953,
|
|
"grad_norm": 0.5961064292491131,
|
|
"learning_rate": 3.074288342199601e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20459699630737305,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4400.5,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 2.723004694835681,
|
|
"grad_norm": 0.6295042891217995,
|
|
"learning_rate": 3.067696567128353e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19677501916885376,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4375.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 2.730829420970266,
|
|
"grad_norm": 0.6912904327088273,
|
|
"learning_rate": 3.06108853075644e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16978582739830017,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3360.6,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 2.7386541471048513,
|
|
"grad_norm": 0.6585419128298273,
|
|
"learning_rate": 3.054464333726e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18841488659381866,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3818.6,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 2.7464788732394365,
|
|
"grad_norm": 0.7015268019163079,
|
|
"learning_rate": 3.0478240769253048e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15373367071151733,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2573.5,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 2.7543035993740217,
|
|
"grad_norm": 0.6836797957678966,
|
|
"learning_rate": 3.0411678614872176e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12120893597602844,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2165.9,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.7621283255086073,
|
|
"grad_norm": 0.645746065977562,
|
|
"learning_rate": 3.0344957887876575e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22335761785507202,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4285.8,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.7699530516431925,
|
|
"grad_norm": 0.7799151915573881,
|
|
"learning_rate": 3.0278079604440536e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1587856560945511,
|
|
"step": 1770,
|
|
"valid_targets_mean": 1729.5,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.6180915764576657,
|
|
"learning_rate": 3.0211044783137975e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13104727864265442,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2821.8,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.7856025039123633,
|
|
"grad_norm": 0.7713233543265153,
|
|
"learning_rate": 3.014385444492693e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15071728825569153,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2175.6,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.7934272300469485,
|
|
"grad_norm": 0.6135334455676279,
|
|
"learning_rate": 3.0076509613133988e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.196501225233078,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4167.9,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 2.8012519561815337,
|
|
"grad_norm": 0.6233446933909947,
|
|
"learning_rate": 3.000901131343872e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18658217787742615,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3509.5,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 2.809076682316119,
|
|
"grad_norm": 0.657055982342519,
|
|
"learning_rate": 2.9941360573858057e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466427892446518,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3258.9,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 2.816901408450704,
|
|
"grad_norm": 0.7178596201182491,
|
|
"learning_rate": 2.9873558424730634e-05,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17080730199813843,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3004.5,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.8247261345852896,
|
|
"grad_norm": 0.6120477743144751,
|
|
"learning_rate": 2.9805605898701078e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19994986057281494,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4439.9,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.832550860719875,
|
|
"grad_norm": 0.6701607115104019,
|
|
"learning_rate": 2.9737504030704306e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619153618812561,
|
|
"step": 1810,
|
|
"valid_targets_mean": 2570.0,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 2.84037558685446,
|
|
"grad_norm": 0.5737483075331443,
|
|
"learning_rate": 2.9669253857949757e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11148478090763092,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2634.1,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 2.8482003129890456,
|
|
"grad_norm": 0.8612726537557142,
|
|
"learning_rate": 2.960085641990557e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22453519701957703,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3813.4,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.856025039123631,
|
|
"grad_norm": 0.6745024030701715,
|
|
"learning_rate": 2.953231275828281e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033141702413559,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3564.2,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 2.863849765258216,
|
|
"grad_norm": 0.6027677690292791,
|
|
"learning_rate": 2.946362391701953e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19975969195365906,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4548.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.871674491392801,
|
|
"grad_norm": 0.6689482226267689,
|
|
"learning_rate": 2.939479094226492e-05,
|
|
"loss": 0.3544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15336869657039642,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3175.5,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 2.8794992175273864,
|
|
"grad_norm": 0.689842536471069,
|
|
"learning_rate": 2.9325814882363367e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19394417107105255,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3388.1,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 2.887323943661972,
|
|
"grad_norm": 0.6265191743442781,
|
|
"learning_rate": 2.925669678783848e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2339908480644226,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4268.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 2.895148669796557,
|
|
"grad_norm": 0.6546154414366928,
|
|
"learning_rate": 2.9187437711377086e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15464986860752106,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2870.4,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 2.9029733959311423,
|
|
"grad_norm": 0.6541618202130365,
|
|
"learning_rate": 2.9118038707813218e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1614140272140503,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4193.8,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 2.910798122065728,
|
|
"grad_norm": 0.5835591237984448,
|
|
"learning_rate": 2.904850083411201e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0938955619931221,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3614.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.918622848200313,
|
|
"grad_norm": 0.9542772179783248,
|
|
"learning_rate": 2.8978825149353656e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21525943279266357,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3367.9,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 2.9264475743348983,
|
|
"grad_norm": 0.6935644382045476,
|
|
"learning_rate": 2.8909012714717222e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14511257410049438,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2412.2,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.9342723004694835,
|
|
"grad_norm": 0.6989083224623761,
|
|
"learning_rate": 2.8839064593464542e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13938301801681519,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2612.1,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 2.9420970266040687,
|
|
"grad_norm": 0.6520259112328394,
|
|
"learning_rate": 2.876898185092395e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17695897817611694,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2664.9,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 2.9499217527386543,
|
|
"grad_norm": 0.5661718710791617,
|
|
"learning_rate": 2.869876555447414e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1638106107711792,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4068.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 2.9577464788732395,
|
|
"grad_norm": 0.5975986204690612,
|
|
"learning_rate": 2.8628416773527837e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13241693377494812,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3464.5,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.9655712050078247,
|
|
"grad_norm": 0.8385971414555112,
|
|
"learning_rate": 2.855793657951556e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24939492344856262,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3302.1,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 2.97339593114241,
|
|
"grad_norm": 0.6607704822380924,
|
|
"learning_rate": 2.8487326045869276e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522812843322754,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2820.4,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 2.981220657276995,
|
|
"grad_norm": 0.7068509989496781,
|
|
"learning_rate": 2.8416586248006056e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16552087664604187,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3197.4,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 2.9890453834115807,
|
|
"grad_norm": 0.562979995802054,
|
|
"learning_rate": 2.83457182633117e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15896588563919067,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4336.0,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 2.996870109546166,
|
|
"grad_norm": 0.6458917937667614,
|
|
"learning_rate": 2.8274723171124327e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18467837572097778,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3342.5,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 3.004694835680751,
|
|
"grad_norm": 0.6497081555801,
|
|
"learning_rate": 2.8203602052717946e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14398255944252014,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2951.0,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 3.0125195618153366,
|
|
"grad_norm": 0.6106827818419601,
|
|
"learning_rate": 2.813235599128597e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17650988698005676,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3694.4,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 3.020344287949922,
|
|
"grad_norm": 0.6126241853796064,
|
|
"learning_rate": 2.806098607192472e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11839645355939865,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3422.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 3.028169014084507,
|
|
"grad_norm": 0.8384918077076364,
|
|
"learning_rate": 2.7989493381616926e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14580056071281433,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2007.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.035993740219092,
|
|
"grad_norm": 0.8223676427701437,
|
|
"learning_rate": 2.791787900921513e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14985796809196472,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2196.9,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 3.043818466353678,
|
|
"grad_norm": 0.6600813240305426,
|
|
"learning_rate": 2.784614404542515e-05,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805870532989502,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4198.0,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.051643192488263,
|
|
"grad_norm": 0.7515209817172267,
|
|
"learning_rate": 2.7774289582789407e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762523353099823,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3247.0,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.059467918622848,
|
|
"grad_norm": 0.5811531215834707,
|
|
"learning_rate": 2.7702316715670363e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107625961303711,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3379.9,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 3.0672926447574334,
|
|
"grad_norm": 0.683947975127039,
|
|
"learning_rate": 2.7630226540233775e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10625830292701721,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2093.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.075117370892019,
|
|
"grad_norm": 0.670890765903486,
|
|
"learning_rate": 2.7558020154432054e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12127495557069778,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3181.0,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 3.082942097026604,
|
|
"grad_norm": 0.6817497863874894,
|
|
"learning_rate": 2.7485698657987528e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17593321204185486,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3201.1,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.0907668231611893,
|
|
"grad_norm": 0.7041737064111091,
|
|
"learning_rate": 2.7413263152375684e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500227302312851,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3076.9,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 3.0985915492957745,
|
|
"grad_norm": 0.7598143687091022,
|
|
"learning_rate": 2.7340714740808404e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306656002998352,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 3.10641627543036,
|
|
"grad_norm": 0.5914186260454464,
|
|
"learning_rate": 2.7268054528217144e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15824326872825623,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4427.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 3.1142410015649453,
|
|
"grad_norm": 0.656917906291581,
|
|
"learning_rate": 2.7195283621236143e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19817262887954712,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3761.4,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 3.1220657276995305,
|
|
"grad_norm": 1.1214179272041178,
|
|
"learning_rate": 2.7122403128185516e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19653594493865967,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2191.2,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 3.1298904538341157,
|
|
"grad_norm": 0.6210361940041652,
|
|
"learning_rate": 2.7049414159054435e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12092889845371246,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3400.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 3.1377151799687013,
|
|
"grad_norm": 0.7361772698127387,
|
|
"learning_rate": 2.697631782548416e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13506481051445007,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2725.0,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 3.1455399061032865,
|
|
"grad_norm": 0.6023454042173408,
|
|
"learning_rate": 2.6903115240751156e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11059155315160751,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3388.0,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 3.1533646322378717,
|
|
"grad_norm": 0.6342073976213858,
|
|
"learning_rate": 2.6829807519750127e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725701093673706,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3607.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.161189358372457,
|
|
"grad_norm": 0.7636244934736577,
|
|
"learning_rate": 2.6756395778977014e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772337108850479,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2819.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 3.169014084507042,
|
|
"grad_norm": 0.619289123056138,
|
|
"learning_rate": 2.668288113651202e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16327960789203644,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4673.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.1768388106416277,
|
|
"grad_norm": 0.6149351078431927,
|
|
"learning_rate": 2.6609264712002557e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16197413206100464,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4423.4,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 3.184663536776213,
|
|
"grad_norm": 0.6914141298798548,
|
|
"learning_rate": 2.6535547626646222e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2057703733444214,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3733.2,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.192488262910798,
|
|
"grad_norm": 0.5802504900748476,
|
|
"learning_rate": 2.646173100317368e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22027373313903809,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5499.5,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 3.2003129890453836,
|
|
"grad_norm": 0.6464696636362578,
|
|
"learning_rate": 2.63878159658316e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14309558272361755,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3369.2,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 3.208137715179969,
|
|
"grad_norm": 0.6156762039551619,
|
|
"learning_rate": 2.631380364036553e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13528528809547424,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3362.0,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 3.215962441314554,
|
|
"grad_norm": 0.6080948215947096,
|
|
"learning_rate": 2.6239695154002718e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523822546005249,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4703.6,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 3.223787167449139,
|
|
"grad_norm": 0.830783177056543,
|
|
"learning_rate": 2.616549163543499e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22060827910900116,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3271.8,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 3.2316118935837244,
|
|
"grad_norm": 0.7174846780092924,
|
|
"learning_rate": 2.6091194214801527e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885250449180603,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4648.1,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 3.23943661971831,
|
|
"grad_norm": 0.5964534408485419,
|
|
"learning_rate": 2.601680402367166e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012079566717148,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2638.9,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 3.247261345852895,
|
|
"grad_norm": 0.5698163390553281,
|
|
"learning_rate": 2.594232219502765e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18175151944160461,
|
|
"step": 2075,
|
|
"valid_targets_mean": 6045.2,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 3.2550860719874803,
|
|
"grad_norm": 0.685573546625663,
|
|
"learning_rate": 2.5867749863247415e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391291618347168,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2920.9,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.262910798122066,
|
|
"grad_norm": 0.6474104807461694,
|
|
"learning_rate": 2.579308816408726e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25288039445877075,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4662.9,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 3.270735524256651,
|
|
"grad_norm": 0.6235184441298909,
|
|
"learning_rate": 2.5718338234664577e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515205204486847,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3434.4,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 3.2785602503912363,
|
|
"grad_norm": 0.7151484039500291,
|
|
"learning_rate": 2.5643501213440528e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10006425529718399,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2233.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.2863849765258215,
|
|
"grad_norm": 0.6830905765334356,
|
|
"learning_rate": 2.556857824020272e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1232006698846817,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2604.5,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.2942097026604067,
|
|
"grad_norm": 0.6886316140793676,
|
|
"learning_rate": 2.5493570456047808e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19684526324272156,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3772.9,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 3.3020344287949923,
|
|
"grad_norm": 0.7203907487240322,
|
|
"learning_rate": 2.5418479003364157e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08641336858272552,
|
|
"step": 2110,
|
|
"valid_targets_mean": 1908.9,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 3.3098591549295775,
|
|
"grad_norm": 0.6604601859851796,
|
|
"learning_rate": 2.5343305025814426e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15976139903068542,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4040.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 3.3176838810641627,
|
|
"grad_norm": 0.6329634726287663,
|
|
"learning_rate": 2.5268049668318133e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15170234441757202,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3609.6,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 3.325508607198748,
|
|
"grad_norm": 0.647531871952801,
|
|
"learning_rate": 2.5192714077034257e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15322692692279816,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4196.4,
|
|
"valid_targets_min": 1953
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.6357492130653484,
|
|
"learning_rate": 2.511729939934374e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15362387895584106,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3734.6,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.3411580594679187,
|
|
"grad_norm": 0.7132645485022204,
|
|
"learning_rate": 2.504180678383204e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358301341533661,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2219.8,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.348982785602504,
|
|
"grad_norm": 0.5794313828010946,
|
|
"learning_rate": 2.4966237380271623e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834259331226349,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2872.9,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 3.356807511737089,
|
|
"grad_norm": 0.6252793739889049,
|
|
"learning_rate": 2.489059233960447e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1227874830365181,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3032.5,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 3.3646322378716746,
|
|
"grad_norm": 0.6003205922796153,
|
|
"learning_rate": 2.481487281392452e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13900496065616608,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.37245696400626,
|
|
"grad_norm": 0.6765261385589029,
|
|
"learning_rate": 2.473907995646014e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1633828580379486,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3454.0,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.380281690140845,
|
|
"grad_norm": 0.5853631545336233,
|
|
"learning_rate": 2.4663214921556576e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509736716747284,
|
|
"step": 2160,
|
|
"valid_targets_mean": 6106.2,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 3.38810641627543,
|
|
"grad_norm": 0.6259850769081942,
|
|
"learning_rate": 2.458727886465833e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18648961186408997,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4450.0,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 3.395931142410016,
|
|
"grad_norm": 0.5850193333430257,
|
|
"learning_rate": 2.4511272942291615e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1478157341480255,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3958.2,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.403755868544601,
|
|
"grad_norm": 0.603280554047555,
|
|
"learning_rate": 2.443519831204668e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20803077518939972,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4708.4,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 3.411580594679186,
|
|
"grad_norm": 0.5738597318204078,
|
|
"learning_rate": 2.4359056132560258e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08233896642923355,
|
|
"step": 2180,
|
|
"valid_targets_mean": 1818.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 3.4194053208137714,
|
|
"grad_norm": 0.7189994213138329,
|
|
"learning_rate": 2.4282847563497826e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24029108881950378,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4395.8,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 3.427230046948357,
|
|
"grad_norm": 0.6923643462965208,
|
|
"learning_rate": 2.4206573765536034e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26428306102752686,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4716.6,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 3.435054773082942,
|
|
"grad_norm": 0.6194609103923178,
|
|
"learning_rate": 2.4130235900344958e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2023763358592987,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4305.0,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 3.4428794992175273,
|
|
"grad_norm": 0.6785080892236846,
|
|
"learning_rate": 2.4053835130570433e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775413155555725,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4351.1,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 3.4507042253521125,
|
|
"grad_norm": 0.664728908237957,
|
|
"learning_rate": 2.3977372619816378e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1149962991476059,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2427.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.458528951486698,
|
|
"grad_norm": 0.6868346925310195,
|
|
"learning_rate": 2.390084953262701e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23499436676502228,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4611.4,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 3.4663536776212833,
|
|
"grad_norm": 0.6438179309320402,
|
|
"learning_rate": 2.3824267034469163e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14294379949569702,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3150.2,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 3.4741784037558685,
|
|
"grad_norm": 0.7529773698615556,
|
|
"learning_rate": 2.37476262917145e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18676266074180603,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3391.9,
|
|
"valid_targets_min": 2468
|
|
},
|
|
{
|
|
"epoch": 3.4820031298904537,
|
|
"grad_norm": 0.6210089679342355,
|
|
"learning_rate": 2.3670928471621766e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16421878337860107,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4064.5,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 3.4898278560250393,
|
|
"grad_norm": 0.6590890021630893,
|
|
"learning_rate": 2.3594174742319035e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1044304296374321,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2429.0,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.4976525821596245,
|
|
"grad_norm": 0.7522669070858982,
|
|
"learning_rate": 2.3517366272785856e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14846603572368622,
|
|
"step": 2235,
|
|
"valid_targets_mean": 2807.5,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 3.5054773082942097,
|
|
"grad_norm": 0.6829748029271354,
|
|
"learning_rate": 2.3440504232835508e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15734925866127014,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3356.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.513302034428795,
|
|
"grad_norm": 0.6031703696902189,
|
|
"learning_rate": 2.3363589793097153e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772865653038025,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4950.5,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 3.52112676056338,
|
|
"grad_norm": 0.799117896842628,
|
|
"learning_rate": 2.3286624124998028e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16151908040046692,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2447.0,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.5289514866979657,
|
|
"grad_norm": 0.6220327676172026,
|
|
"learning_rate": 2.3209608400745572e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17993655800819397,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4395.9,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 3.536776212832551,
|
|
"grad_norm": 0.6017815891103857,
|
|
"learning_rate": 2.313254379330961e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15044213831424713,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4353.2,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.544600938967136,
|
|
"grad_norm": 0.681895578380265,
|
|
"learning_rate": 2.305543147640446e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705062985420227,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3547.6,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.5524256651017216,
|
|
"grad_norm": 0.5513735278514147,
|
|
"learning_rate": 2.2978272624471073e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23636803030967712,
|
|
"step": 2270,
|
|
"valid_targets_mean": 5766.6,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.560250391236307,
|
|
"grad_norm": 0.6434597942500088,
|
|
"learning_rate": 2.2901068412659143e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345367431640625,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2941.4,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.568075117370892,
|
|
"grad_norm": 0.6839029049756532,
|
|
"learning_rate": 2.2823820016809197e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15413150191307068,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3330.4,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.575899843505477,
|
|
"grad_norm": 0.594556854685865,
|
|
"learning_rate": 2.2746528613434708e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15590301156044006,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4553.9,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 3.5837245696400624,
|
|
"grad_norm": 0.7093436946435986,
|
|
"learning_rate": 2.266919537970415e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17857292294502258,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3967.4,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 3.591549295774648,
|
|
"grad_norm": 0.828022486430082,
|
|
"learning_rate": 2.2591821493423113e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19134113192558289,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3014.8,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 3.599374021909233,
|
|
"grad_norm": 0.5931382873785473,
|
|
"learning_rate": 2.25144081330163e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09897127747535706,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3395.2,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 3.6071987480438183,
|
|
"grad_norm": 0.8953725741895598,
|
|
"learning_rate": 2.243695647750964e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15227991342544556,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2294.1,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 3.615023474178404,
|
|
"grad_norm": 0.6694637457747128,
|
|
"learning_rate": 2.2359467706512293e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12906771898269653,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3129.9,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.622848200312989,
|
|
"grad_norm": 0.738240158940209,
|
|
"learning_rate": 2.2281943000198716e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13217902183532715,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2446.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.6306729264475743,
|
|
"grad_norm": 0.686030349176259,
|
|
"learning_rate": 2.2204383539290645e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1888277530670166,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3847.6,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 3.6384976525821595,
|
|
"grad_norm": 0.6924217854869346,
|
|
"learning_rate": 2.212679050503916e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18183617293834686,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3635.5,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 3.6463223787167447,
|
|
"grad_norm": 0.7223153692257793,
|
|
"learning_rate": 2.204916507920666e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1842113584280014,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3117.4,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.6541471048513303,
|
|
"grad_norm": 0.6680718750165622,
|
|
"learning_rate": 2.1971508444048874e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10785067081451416,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2447.0,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 3.6619718309859155,
|
|
"grad_norm": 0.8171870989648095,
|
|
"learning_rate": 2.1893821782296873e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068915218114853,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2562.2,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 3.6697965571205007,
|
|
"grad_norm": 0.7366985995405436,
|
|
"learning_rate": 2.1816106277139015e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555401235818863,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2277.2,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 3.6776212832550863,
|
|
"grad_norm": 0.7063315905311864,
|
|
"learning_rate": 2.1738363112202982e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16657842695713043,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2566.8,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.6854460093896715,
|
|
"grad_norm": 0.7613314416841395,
|
|
"learning_rate": 2.1660593471537697e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1992504894733429,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3502.4,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 3.6932707355242567,
|
|
"grad_norm": 0.6680427816935602,
|
|
"learning_rate": 2.158279853959532e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651182323694229,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3505.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 3.701095461658842,
|
|
"grad_norm": 0.6028130818087857,
|
|
"learning_rate": 2.1504979501213224e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1561662256717682,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4434.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.708920187793427,
|
|
"grad_norm": 0.699865548947199,
|
|
"learning_rate": 2.1427137541595894e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19287079572677612,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3832.4,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 3.7167449139280127,
|
|
"grad_norm": 0.7154478909832181,
|
|
"learning_rate": 2.134927384629695e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18592238426208496,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3951.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.724569640062598,
|
|
"grad_norm": 0.7297464727264829,
|
|
"learning_rate": 2.127138960120101e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18737393617630005,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3127.8,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 3.732394366197183,
|
|
"grad_norm": 0.5536088464628784,
|
|
"learning_rate": 2.1193485992505715e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18994617462158203,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5289.4,
|
|
"valid_targets_min": 1632
|
|
},
|
|
{
|
|
"epoch": 3.7402190923317686,
|
|
"grad_norm": 0.5742531212562263,
|
|
"learning_rate": 2.1115564206703584e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16511113941669464,
|
|
"step": 2390,
|
|
"valid_targets_mean": 5543.0,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 3.748043818466354,
|
|
"grad_norm": 0.7681537097694591,
|
|
"learning_rate": 2.1037625430564003e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20063745975494385,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3213.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.755868544600939,
|
|
"grad_norm": 0.5852360238668101,
|
|
"learning_rate": 2.09596708511151e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11034137010574341,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2358.9,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 3.763693270735524,
|
|
"grad_norm": 0.6679296466857061,
|
|
"learning_rate": 2.0881701655625713e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21008796989917755,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4001.0,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 3.7715179968701094,
|
|
"grad_norm": 1.5109999395286426,
|
|
"learning_rate": 2.0803719031587282e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11029382795095444,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2704.2,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.779342723004695,
|
|
"grad_norm": 0.7642789951664134,
|
|
"learning_rate": 2.0725724166695765e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669953167438507,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2107.5,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 3.78716744913928,
|
|
"grad_norm": 0.5737149303615792,
|
|
"learning_rate": 2.064771824883354e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1760150045156479,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5345.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 3.7949921752738653,
|
|
"grad_norm": 0.6740470205588289,
|
|
"learning_rate": 2.0569702466051344e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14609849452972412,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3053.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 3.802816901408451,
|
|
"grad_norm": 0.7921321569792215,
|
|
"learning_rate": 2.0491678006550152e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18210767209529877,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3432.4,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 3.810641627543036,
|
|
"grad_norm": 0.610597437719172,
|
|
"learning_rate": 2.0413646058663076e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11667002737522125,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2716.1,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 3.8184663536776213,
|
|
"grad_norm": 0.5667524129024861,
|
|
"learning_rate": 2.0335607810837293e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14021626114845276,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4360.9,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 3.8262910798122065,
|
|
"grad_norm": 0.6240218937599222,
|
|
"learning_rate": 2.0257564451615933e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14283740520477295,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3134.1,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 3.8341158059467917,
|
|
"grad_norm": 0.5259101177842823,
|
|
"learning_rate": 2.017951716961996e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1792066991329193,
|
|
"step": 2450,
|
|
"valid_targets_mean": 5186.8,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.8419405320813773,
|
|
"grad_norm": 0.6364015966241136,
|
|
"learning_rate": 2.010146715353009e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20233263075351715,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4790.8,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 3.8497652582159625,
|
|
"grad_norm": 0.5623902516903706,
|
|
"learning_rate": 2.002341559206867e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11632168292999268,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3357.5,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 3.8575899843505477,
|
|
"grad_norm": 0.5639068127350884,
|
|
"learning_rate": 1.99453636739816e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16382811963558197,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4698.2,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 3.865414710485133,
|
|
"grad_norm": 2.8427169866873183,
|
|
"learning_rate": 1.986731258802021e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10248608887195587,
|
|
"step": 2470,
|
|
"valid_targets_mean": 1905.2,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 3.873239436619718,
|
|
"grad_norm": 0.6100794810555497,
|
|
"learning_rate": 1.978926352292314e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18855592608451843,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4656.2,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 3.8810641627543037,
|
|
"grad_norm": 0.8144924100531931,
|
|
"learning_rate": 1.9711217667398264e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477562040090561,
|
|
"step": 2480,
|
|
"valid_targets_mean": 2736.2,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 0.6808048004901257,
|
|
"learning_rate": 1.9633176210104572e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646517515182495,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3281.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 3.896713615023474,
|
|
"grad_norm": 0.6250507848491269,
|
|
"learning_rate": 1.9555140339634064e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18335703015327454,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4765.4,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 3.9045383411580596,
|
|
"grad_norm": 0.7161566295768828,
|
|
"learning_rate": 1.9477111244493672e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1496560424566269,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2785.9,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 3.912363067292645,
|
|
"grad_norm": 0.6588268611024749,
|
|
"learning_rate": 1.9399090113087092e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21329328417778015,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4798.5,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.92018779342723,
|
|
"grad_norm": 0.8254193842987746,
|
|
"learning_rate": 1.932107813369678e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16275982558727264,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2340.8,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 3.928012519561815,
|
|
"grad_norm": 0.6233158181240838,
|
|
"learning_rate": 1.9243076494465766e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1589318811893463,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3737.2,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.9358372456964004,
|
|
"grad_norm": 0.6761734412516769,
|
|
"learning_rate": 1.916508638337964e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13554273545742035,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3061.5,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 3.943661971830986,
|
|
"grad_norm": 0.5438351340640695,
|
|
"learning_rate": 1.9087108988248357e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.114707812666893,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4473.0,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 3.951486697965571,
|
|
"grad_norm": 0.6528872787928418,
|
|
"learning_rate": 1.9009145496688255e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285504698753357,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3288.2,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 3.9593114241001564,
|
|
"grad_norm": 0.6565991315408365,
|
|
"learning_rate": 1.8931197096103892e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17435544729232788,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 3.967136150234742,
|
|
"grad_norm": 0.737557568410757,
|
|
"learning_rate": 1.8853264973669997e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15909463167190552,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2753.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 3.974960876369327,
|
|
"grad_norm": 0.7040546037369713,
|
|
"learning_rate": 1.877535031631338e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15414124727249146,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3229.5,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 3.9827856025039123,
|
|
"grad_norm": 0.5998581533178446,
|
|
"learning_rate": 1.8697454310694832e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17271047830581665,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4692.5,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 3.9906103286384975,
|
|
"grad_norm": 0.7354265701203705,
|
|
"learning_rate": 1.8619578143191096e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13552381098270416,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2845.0,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 3.9984350547730827,
|
|
"grad_norm": 0.5915667853544958,
|
|
"learning_rate": 1.854172299987677e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262306571006775,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3247.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 4.006259780907668,
|
|
"grad_norm": 0.7815620614431854,
|
|
"learning_rate": 1.8463890066506253e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15015846490859985,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2450.4,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.014084507042254,
|
|
"grad_norm": 0.7256619134009854,
|
|
"learning_rate": 1.838608052849566e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18715006113052368,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3077.6,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 4.021909233176839,
|
|
"grad_norm": 0.804197325350692,
|
|
"learning_rate": 1.8308295570904803e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09203845262527466,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2303.5,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 4.029733959311424,
|
|
"grad_norm": 0.775899577487386,
|
|
"learning_rate": 1.823053637841913e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15230292081832886,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2976.1,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 4.037558685446009,
|
|
"grad_norm": 0.706003235795555,
|
|
"learning_rate": 1.8152804135331688e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1888207197189331,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3730.4,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 4.045383411580595,
|
|
"grad_norm": 0.7398852494151484,
|
|
"learning_rate": 1.8075100025525052e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1713675856590271,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3315.5,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.05320813771518,
|
|
"grad_norm": 0.6192577990961331,
|
|
"learning_rate": 1.7997425232453335e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18109264969825745,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4859.0,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 4.061032863849765,
|
|
"grad_norm": 0.5723634811805203,
|
|
"learning_rate": 1.7919780939124154e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952357292175293,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5372.5,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 4.068857589984351,
|
|
"grad_norm": 0.77327804088328,
|
|
"learning_rate": 1.7842168328080593e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16615641117095947,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2586.4,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 4.076682316118935,
|
|
"grad_norm": 0.6960571578566941,
|
|
"learning_rate": 1.7764588581383218e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14354319870471954,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2759.1,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 4.084507042253521,
|
|
"grad_norm": 0.8230553537504406,
|
|
"learning_rate": 1.768704288059205e-05,
|
|
"loss": 0.3103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14885714650154114,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2784.1,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 4.092331768388107,
|
|
"grad_norm": 0.733080556221669,
|
|
"learning_rate": 1.7609532406748605e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12462314963340759,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2549.9,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 4.100156494522691,
|
|
"grad_norm": 0.652005430875824,
|
|
"learning_rate": 1.753205834035785e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12314299494028091,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3245.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 4.107981220657277,
|
|
"grad_norm": 0.7738719093657458,
|
|
"learning_rate": 1.7454621861370286e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18441398441791534,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3417.1,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 4.115805946791863,
|
|
"grad_norm": 0.7472396848218984,
|
|
"learning_rate": 1.7377224149163945e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14144819974899292,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3206.5,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.123630672926447,
|
|
"grad_norm": 0.6066991829364988,
|
|
"learning_rate": 1.7299866382526402e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128000259399414,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5041.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.131455399061033,
|
|
"grad_norm": 0.621076964548531,
|
|
"learning_rate": 1.7222549739636875e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20066368579864502,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 4.139280125195619,
|
|
"grad_norm": 0.5808459609637091,
|
|
"learning_rate": 1.714527539804826e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12017588317394257,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4241.8,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 4.147104851330203,
|
|
"grad_norm": 0.79602316650433,
|
|
"learning_rate": 1.7068044534669196e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13346831500530243,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2431.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 4.154929577464789,
|
|
"grad_norm": 0.863420075183862,
|
|
"learning_rate": 1.6990858325746102e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13455496728420258,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4023.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 4.162754303599374,
|
|
"grad_norm": 0.7375705855827372,
|
|
"learning_rate": 1.6913717946845335e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23618227243423462,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4119.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.170579029733959,
|
|
"grad_norm": 0.6830638290785881,
|
|
"learning_rate": 1.6836624572835236e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20515203475952148,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4625.5,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 4.178403755868545,
|
|
"grad_norm": 0.6053018361544014,
|
|
"learning_rate": 1.6759579377868246e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158984899520874,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3187.1,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.18622848200313,
|
|
"grad_norm": 0.6550038757909061,
|
|
"learning_rate": 1.6682583535363046e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13109105825424194,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3515.2,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.194053208137715,
|
|
"grad_norm": 0.7446319585989645,
|
|
"learning_rate": 1.6605638217986622e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17897507548332214,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3722.5,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 4.2018779342723,
|
|
"grad_norm": 0.6987538835667901,
|
|
"learning_rate": 1.6528744597636497e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16364216804504395,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3185.4,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.209702660406886,
|
|
"grad_norm": 0.6694197348221098,
|
|
"learning_rate": 1.6451903845422804e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18671992421150208,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4783.9,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 4.217527386541471,
|
|
"grad_norm": 0.7889427154471012,
|
|
"learning_rate": 1.6375117131650507e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1707482933998108,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3489.4,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 4.225352112676056,
|
|
"grad_norm": 0.5657169114515138,
|
|
"learning_rate": 1.629838562580151e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1399533450603485,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3991.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.233176838810642,
|
|
"grad_norm": 0.708749812447136,
|
|
"learning_rate": 1.6221710496516922e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16576795279979706,
|
|
"step": 2705,
|
|
"valid_targets_mean": 2960.4,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 4.241001564945227,
|
|
"grad_norm": 0.7919370110229206,
|
|
"learning_rate": 1.614509291157921e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1864771544933319,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3837.6,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 4.248826291079812,
|
|
"grad_norm": 0.6971774631093178,
|
|
"learning_rate": 1.606853403789443e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15874437987804413,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2912.4,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 4.256651017214398,
|
|
"grad_norm": 0.7666043975725115,
|
|
"learning_rate": 1.5992035041474437e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13732485473155975,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3001.4,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 4.264475743348982,
|
|
"grad_norm": 0.6811607744576619,
|
|
"learning_rate": 1.591559708741915e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17523469030857086,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4413.9,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.272300469483568,
|
|
"grad_norm": 0.5949725208559136,
|
|
"learning_rate": 1.5839221339898787e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16199488937854767,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4711.9,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.280125195618154,
|
|
"grad_norm": 0.6166308505964871,
|
|
"learning_rate": 1.576290896213617e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10029016435146332,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3590.0,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.287949921752738,
|
|
"grad_norm": 0.6886327592362123,
|
|
"learning_rate": 1.5686661116388947e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16522949934005737,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3831.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 4.295774647887324,
|
|
"grad_norm": 0.7827421972162619,
|
|
"learning_rate": 1.5610478963931953e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15029554069042206,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2525.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.30359937402191,
|
|
"grad_norm": 0.6620991736890036,
|
|
"learning_rate": 1.5534363665039482e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13980181515216827,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3549.2,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 4.311424100156494,
|
|
"grad_norm": 0.752932034510738,
|
|
"learning_rate": 1.5458316378967638e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1904088854789734,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3462.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.31924882629108,
|
|
"grad_norm": 0.6817473949894705,
|
|
"learning_rate": 1.5382338263936663e-05,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17454597353935242,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4460.2,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 4.327073552425665,
|
|
"grad_norm": 0.6905483335836906,
|
|
"learning_rate": 1.5306430477113336e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469484567642212,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2982.4,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 4.33489827856025,
|
|
"grad_norm": 0.7091894586930878,
|
|
"learning_rate": 1.5230594174593267e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16512230038642883,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3920.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.342723004694836,
|
|
"grad_norm": 0.730309706109968,
|
|
"learning_rate": 1.515483051138338e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10604424774646759,
|
|
"step": 2775,
|
|
"valid_targets_mean": 1970.8,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 4.350547730829421,
|
|
"grad_norm": 0.6534555108175565,
|
|
"learning_rate": 1.5079140641384275e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16894271969795227,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4348.4,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.358372456964006,
|
|
"grad_norm": 0.8115035802045736,
|
|
"learning_rate": 1.5003525717372669e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18832823634147644,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3487.9,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 4.366197183098592,
|
|
"grad_norm": 0.6710949804965987,
|
|
"learning_rate": 1.4927986890983801e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14379042387008667,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3730.2,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 4.374021909233177,
|
|
"grad_norm": 0.7399507894160943,
|
|
"learning_rate": 1.4852525312693958e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11917468905448914,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3067.6,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.381846635367762,
|
|
"grad_norm": 0.645691070882045,
|
|
"learning_rate": 1.4777142131802897e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14328545331954956,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4753.9,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 4.389671361502347,
|
|
"grad_norm": 0.6685902355749985,
|
|
"learning_rate": 1.4701838496416379e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14348486065864563,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3120.2,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 4.397496087636933,
|
|
"grad_norm": 0.6817765326403208,
|
|
"learning_rate": 1.4626615553428659e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09124431014060974,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2663.8,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.405320813771518,
|
|
"grad_norm": 0.8086285316798018,
|
|
"learning_rate": 1.4551474448505008e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10501548647880554,
|
|
"step": 2815,
|
|
"valid_targets_mean": 1928.5,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 4.413145539906103,
|
|
"grad_norm": 0.7169368443914089,
|
|
"learning_rate": 1.4476416326064304e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16433200240135193,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3460.2,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 4.420970266040689,
|
|
"grad_norm": 0.5488506396116497,
|
|
"learning_rate": 1.4401442329261575e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163071870803833,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5542.1,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 4.428794992175274,
|
|
"grad_norm": 0.7268846811614931,
|
|
"learning_rate": 1.4326553599970585e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14691311120986938,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3621.6,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 4.436619718309859,
|
|
"grad_norm": 0.7677702612793671,
|
|
"learning_rate": 1.4251751278766472e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13916173577308655,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2401.2,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.7140118146971985,
|
|
"learning_rate": 1.4177036504908322e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10508200526237488,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2271.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 4.452269170579029,
|
|
"grad_norm": 0.879813618856701,
|
|
"learning_rate": 1.4102410416321877e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14075878262519836,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2631.4,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 4.460093896713615,
|
|
"grad_norm": 0.6169609646353483,
|
|
"learning_rate": 1.4027874149582177e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14224573969841003,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4736.5,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 4.467918622848201,
|
|
"grad_norm": 0.6826863344733243,
|
|
"learning_rate": 1.395342883989625e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13745032250881195,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3478.2,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 4.475743348982785,
|
|
"grad_norm": 0.5347110201560462,
|
|
"learning_rate": 1.387907562108581e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299440562725067,
|
|
"step": 2860,
|
|
"valid_targets_mean": 5759.8,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 4.483568075117371,
|
|
"grad_norm": 0.7294414163315438,
|
|
"learning_rate": 1.380481562557002e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0863775759935379,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2032.5,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.491392801251957,
|
|
"grad_norm": 0.7135569632371241,
|
|
"learning_rate": 1.3730649984348224e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083780825138092,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2561.6,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 4.499217527386541,
|
|
"grad_norm": 0.6589202147065584,
|
|
"learning_rate": 1.3656579826982718e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24648287892341614,
|
|
"step": 2875,
|
|
"valid_targets_mean": 6207.9,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 4.507042253521127,
|
|
"grad_norm": 0.761238479428033,
|
|
"learning_rate": 1.3582606281581567e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17097613215446472,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3192.8,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 4.514866979655712,
|
|
"grad_norm": 0.7153865810782501,
|
|
"learning_rate": 1.3508730474781393e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15984931588172913,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4827.8,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 4.522691705790297,
|
|
"grad_norm": 0.767311385798822,
|
|
"learning_rate": 1.3434953531730241e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12655822932720184,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2977.8,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 4.530516431924883,
|
|
"grad_norm": 0.7674127140321725,
|
|
"learning_rate": 1.3361276576070443e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12477187812328339,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2598.0,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.538341158059468,
|
|
"grad_norm": 0.678514380422609,
|
|
"learning_rate": 1.3287700729921489e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13608643412590027,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2902.4,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 4.546165884194053,
|
|
"grad_norm": 0.6888413222543964,
|
|
"learning_rate": 1.3214227113862941e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350422501564026,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3151.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 4.553990610328638,
|
|
"grad_norm": 0.6716770257123718,
|
|
"learning_rate": 1.3140856846917374e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11061020940542221,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2878.8,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 4.561815336463224,
|
|
"grad_norm": 0.7186173804178531,
|
|
"learning_rate": 1.3067591046533327e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14171457290649414,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2923.9,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 4.569640062597809,
|
|
"grad_norm": 0.7407578535501071,
|
|
"learning_rate": 1.2994430828568292e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762600839138031,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2918.2,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 4.577464788732394,
|
|
"grad_norm": 0.6766237024102171,
|
|
"learning_rate": 1.2921377307271717e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11852709203958511,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2938.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.58528951486698,
|
|
"grad_norm": 0.6144428440928823,
|
|
"learning_rate": 1.2848431595268001e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10764556378126144,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3044.5,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 4.593114241001565,
|
|
"grad_norm": 0.7375478615797338,
|
|
"learning_rate": 1.2775594803539613e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18372182548046112,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3332.9,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 4.60093896713615,
|
|
"grad_norm": 0.8892102274226293,
|
|
"learning_rate": 1.2702868041410122e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14227896928787231,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2688.4,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 4.608763693270736,
|
|
"grad_norm": 0.7817684747766106,
|
|
"learning_rate": 1.2630252416527332e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1082865446805954,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2791.9,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 4.616588419405321,
|
|
"grad_norm": 0.7113021794432126,
|
|
"learning_rate": 1.2557749034846367e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14704297482967377,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3075.2,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 4.624413145539906,
|
|
"grad_norm": 0.6298704359449511,
|
|
"learning_rate": 1.2485359000612886e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08937743306159973,
|
|
"step": 2955,
|
|
"valid_targets_mean": 2992.5,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 4.632237871674492,
|
|
"grad_norm": 0.7260853689279305,
|
|
"learning_rate": 1.2413083416346226e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13618126511573792,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3067.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.640062597809076,
|
|
"grad_norm": 0.8705322895789773,
|
|
"learning_rate": 1.2340923382822617e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18001487851142883,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2894.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.647887323943662,
|
|
"grad_norm": 0.6256417429416149,
|
|
"learning_rate": 1.226887999905844e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15340688824653625,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4156.2,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.655712050078248,
|
|
"grad_norm": 0.6710338527114692,
|
|
"learning_rate": 1.2196954362293433e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14488068222999573,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3418.4,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 4.663536776212832,
|
|
"grad_norm": 0.7077017527420193,
|
|
"learning_rate": 1.2125147567974049e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289902627468109,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2782.0,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 4.671361502347418,
|
|
"grad_norm": 0.6991064860356734,
|
|
"learning_rate": 1.2053460709736724e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10459490865468979,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2627.6,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.679186228482003,
|
|
"grad_norm": 0.7413676942089216,
|
|
"learning_rate": 1.1981894879391249e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17290960252285004,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3406.6,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 4.687010954616588,
|
|
"grad_norm": 0.706859640366104,
|
|
"learning_rate": 1.1910451166904107e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12370892614126205,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2687.0,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 4.694835680751174,
|
|
"grad_norm": 0.637972817023368,
|
|
"learning_rate": 1.1839130660381906e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09519410133361816,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2948.9,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.702660406885759,
|
|
"grad_norm": 0.6198165041924901,
|
|
"learning_rate": 1.17679344460548e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12153550982475281,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4018.8,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 4.710485133020344,
|
|
"grad_norm": 0.7206952503689106,
|
|
"learning_rate": 1.169686360825993e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1465935856103897,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.71830985915493,
|
|
"grad_norm": 0.7471075400581767,
|
|
"learning_rate": 1.1625919229424927e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14908291399478912,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3103.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.726134585289515,
|
|
"grad_norm": 0.6506843010917256,
|
|
"learning_rate": 1.1555102390051416e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19343061745166779,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5200.6,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 4.7339593114241,
|
|
"grad_norm": 0.6084918610353264,
|
|
"learning_rate": 1.1484414168698547e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1388978660106659,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4343.8,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 4.741784037558686,
|
|
"grad_norm": 0.6879294871624883,
|
|
"learning_rate": 1.1413855641966616e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2059255838394165,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4369.1,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.749608763693271,
|
|
"grad_norm": 0.5990861868236629,
|
|
"learning_rate": 1.1343427884480614e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11333713680505753,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3254.9,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 4.757433489827856,
|
|
"grad_norm": 0.742870178079539,
|
|
"learning_rate": 1.1273131968873878e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16452059149742126,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3306.5,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 4.765258215962441,
|
|
"grad_norm": 0.8133557848625689,
|
|
"learning_rate": 1.1202968965771767e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22119231522083282,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2629.8,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 4.773082942097027,
|
|
"grad_norm": 0.7906877640353549,
|
|
"learning_rate": 1.1132939943775353e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15806549787521362,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2288.8,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 4.780907668231611,
|
|
"grad_norm": 0.672168030568747,
|
|
"learning_rate": 1.1063045969445123e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15582522749900818,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3942.2,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 4.788732394366197,
|
|
"grad_norm": 0.619628358384247,
|
|
"learning_rate": 1.0993288107284787e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15950481593608856,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4121.6,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.796557120500783,
|
|
"grad_norm": 0.535301542067281,
|
|
"learning_rate": 1.0923667419724973e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330057680606842,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5703.2,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 4.804381846635367,
|
|
"grad_norm": 0.7227143203838373,
|
|
"learning_rate": 1.0854184967107162e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18391254544258118,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3479.0,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.812206572769953,
|
|
"grad_norm": 0.5526103078842306,
|
|
"learning_rate": 1.0784841807667448e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11272988468408585,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3316.6,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 4.820031298904539,
|
|
"grad_norm": 0.6927041742969711,
|
|
"learning_rate": 1.071563899752046e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27749431133270264,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4371.0,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 4.827856025039123,
|
|
"grad_norm": 0.574851630600299,
|
|
"learning_rate": 1.0646577590643261e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19200757145881653,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5457.9,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 4.835680751173709,
|
|
"grad_norm": 0.7583493270417194,
|
|
"learning_rate": 1.0577658638859336e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14205504953861237,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2741.0,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 4.843505477308295,
|
|
"grad_norm": 0.6403599120514799,
|
|
"learning_rate": 1.050888319182251e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546416282653809,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4737.8,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.851330203442879,
|
|
"grad_norm": 0.7335276458002238,
|
|
"learning_rate": 1.0440252297000993e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09220539033412933,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2139.5,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 4.859154929577465,
|
|
"grad_norm": 0.6518974777340326,
|
|
"learning_rate": 1.0371766999661452e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08310995250940323,
|
|
"step": 3105,
|
|
"valid_targets_mean": 1784.6,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.86697965571205,
|
|
"grad_norm": 0.717254106295311,
|
|
"learning_rate": 1.0303428342853049e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10422059893608093,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2659.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.874804381846635,
|
|
"grad_norm": 0.7941885374864225,
|
|
"learning_rate": 1.0235237367391567e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1676805019378662,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 4.882629107981221,
|
|
"grad_norm": 0.6682221976746766,
|
|
"learning_rate": 1.0167195111843561e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19089744985103607,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4454.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.890453834115806,
|
|
"grad_norm": 0.7362434420136498,
|
|
"learning_rate": 1.009930261251058e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16696253418922424,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3385.4,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 4.898278560250391,
|
|
"grad_norm": 0.7368879450800028,
|
|
"learning_rate": 1.0031560903413283e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17647048830986023,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3712.0,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 4.906103286384976,
|
|
"grad_norm": 0.7232984477651483,
|
|
"learning_rate": 9.963971016275811e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294730544090271,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2575.6,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.913928012519562,
|
|
"grad_norm": 0.8320858308335743,
|
|
"learning_rate": 9.896533980509979e-06,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17643240094184875,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2961.6,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.921752738654147,
|
|
"grad_norm": 0.7081704014248471,
|
|
"learning_rate": 9.829250823199665e-06,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20974400639533997,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4594.6,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 4.929577464788732,
|
|
"grad_norm": 0.6530270670962252,
|
|
"learning_rate": 9.762122569085116e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12794658541679382,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3605.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.937402190923318,
|
|
"grad_norm": 0.6578076930430099,
|
|
"learning_rate": 9.695150240547367e-06,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13816185295581818,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3411.9,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.945226917057903,
|
|
"grad_norm": 0.6391113778427863,
|
|
"learning_rate": 9.628334857592658e-06,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17456874251365662,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4333.9,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 4.953051643192488,
|
|
"grad_norm": 0.6883109333698053,
|
|
"learning_rate": 9.561677437836933e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16550667583942413,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4559.4,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 4.960876369327074,
|
|
"grad_norm": 0.7730717075799641,
|
|
"learning_rate": 9.495178996490293e-06,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583296298980713,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3809.1,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 4.968701095461659,
|
|
"grad_norm": 0.6242752250559606,
|
|
"learning_rate": 9.428840546341553e-06,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011406034231186,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3279.2,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 4.976525821596244,
|
|
"grad_norm": 0.8399537286681305,
|
|
"learning_rate": 9.362663097742823e-06,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10469602048397064,
|
|
"step": 3180,
|
|
"valid_targets_mean": 1525.2,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 4.98435054773083,
|
|
"grad_norm": 0.7495690085221413,
|
|
"learning_rate": 9.296647658594138e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324007213115692,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2242.9,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.992175273865414,
|
|
"grad_norm": 0.7701692683475493,
|
|
"learning_rate": 9.230795234328049e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16519708931446075,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4255.9,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6394268351831731,
|
|
"learning_rate": 9.165106827894391e-06,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1614047884941101,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3201.0,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 5.007824726134586,
|
|
"grad_norm": 0.5982409910592352,
|
|
"learning_rate": 9.099583439744915e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25535017251968384,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 5.01564945226917,
|
|
"grad_norm": 0.7711477531309291,
|
|
"learning_rate": 9.034226067818142e-06,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12466821819543839,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2254.1,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.023474178403756,
|
|
"grad_norm": 0.8181974362502784,
|
|
"learning_rate": 8.9690357075241e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16866326332092285,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2754.1,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 5.031298904538341,
|
|
"grad_norm": 0.6225681243018193,
|
|
"learning_rate": 8.904013351729193e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1064787432551384,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4107.9,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 5.039123630672926,
|
|
"grad_norm": 0.7077748484160369,
|
|
"learning_rate": 8.839159990741061e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16850677132606506,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4466.9,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 5.046948356807512,
|
|
"grad_norm": 0.7536647285541835,
|
|
"learning_rate": 8.774476612293534e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16152280569076538,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3494.6,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 5.054773082942097,
|
|
"grad_norm": 0.7094539730569712,
|
|
"learning_rate": 8.709964201531538e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602696180343628,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3874.9,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 5.062597809076682,
|
|
"grad_norm": 0.7370010693327408,
|
|
"learning_rate": 8.645623740996117e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113275907933712,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2566.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 5.070422535211268,
|
|
"grad_norm": 0.7072494044439211,
|
|
"learning_rate": 8.58145621060949e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10115767270326614,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2815.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 5.078247261345853,
|
|
"grad_norm": 0.6414291779822258,
|
|
"learning_rate": 8.517462587660084e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408054530620575,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4966.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.086071987480438,
|
|
"grad_norm": 0.8575162799220872,
|
|
"learning_rate": 8.453643846787673e-06,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10422495752573013,
|
|
"step": 3250,
|
|
"valid_targets_mean": 1577.1,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 5.093896713615023,
|
|
"grad_norm": 0.6674274142184843,
|
|
"learning_rate": 8.390000959968529e-06,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.164650097489357,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4524.9,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 5.101721439749609,
|
|
"grad_norm": 0.8048562782802858,
|
|
"learning_rate": 8.326534896500646e-06,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11484989523887634,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2732.9,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.109546165884194,
|
|
"grad_norm": 0.7814809034951504,
|
|
"learning_rate": 8.263246622988899e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14056876301765442,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3302.0,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 5.117370892018779,
|
|
"grad_norm": 0.5830177984341955,
|
|
"learning_rate": 8.200137103330428e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10658122599124908,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4095.1,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 5.125195618153365,
|
|
"grad_norm": 0.7497013627389598,
|
|
"learning_rate": 8.13720729869987e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10727694630622864,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2520.5,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.13302034428795,
|
|
"grad_norm": 0.504553268910869,
|
|
"learning_rate": 8.07445816753478e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588989198207855,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4858.2,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 5.140845070422535,
|
|
"grad_norm": 0.6932458316873814,
|
|
"learning_rate": 8.01189066552099e-06,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21443898975849152,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5149.5,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.148669796557121,
|
|
"grad_norm": 0.6823249738720764,
|
|
"learning_rate": 7.949505745578076e-06,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16875088214874268,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4024.5,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 5.156494522691705,
|
|
"grad_norm": 0.6519607660248291,
|
|
"learning_rate": 7.887304357844838e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12601420283317566,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5417.8,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 5.164319248826291,
|
|
"grad_norm": 0.6487836505582227,
|
|
"learning_rate": 7.825287449664854e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13218137621879578,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4428.2,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.172143974960877,
|
|
"grad_norm": 0.7884837597627394,
|
|
"learning_rate": 7.763455965571998e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11905977129936218,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2462.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 5.179968701095461,
|
|
"grad_norm": 0.7179071602003844,
|
|
"learning_rate": 7.701810847276104e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332007646560669,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3702.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.187793427230047,
|
|
"grad_norm": 0.7140305491493428,
|
|
"learning_rate": 7.640353033648598e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985693454742432,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3552.9,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 5.195618153364633,
|
|
"grad_norm": 1.0906456378115439,
|
|
"learning_rate": 7.579083460708218e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11479847133159637,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2712.2,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 5.203442879499217,
|
|
"grad_norm": 0.6242500819030088,
|
|
"learning_rate": 7.518003061606734e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22820940613746643,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5416.5,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.211267605633803,
|
|
"grad_norm": 0.8696212524248051,
|
|
"learning_rate": 7.457112766614769e-06,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12599942088127136,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2991.4,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 5.219092331768388,
|
|
"grad_norm": 0.9318789051065317,
|
|
"learning_rate": 7.396413503107571e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13760197162628174,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2613.8,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 5.226917057902973,
|
|
"grad_norm": 0.6833293446920131,
|
|
"learning_rate": 7.335906195550968e-06,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11336815357208252,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2973.2,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 5.234741784037559,
|
|
"grad_norm": 0.7267384998432306,
|
|
"learning_rate": 7.275591765487222e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10678604245185852,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2942.9,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 5.242566510172144,
|
|
"grad_norm": 0.7022837037761799,
|
|
"learning_rate": 7.215471131521043e-06,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11043022572994232,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2791.4,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 5.250391236306729,
|
|
"grad_norm": 0.8781846949764806,
|
|
"learning_rate": 7.155545209305559e-06,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11200830340385437,
|
|
"step": 3355,
|
|
"valid_targets_mean": 2238.2,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 5.258215962441315,
|
|
"grad_norm": 0.6789077473020962,
|
|
"learning_rate": 7.095814911528383e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1825868785381317,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5099.4,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 5.2660406885759,
|
|
"grad_norm": 0.9757940527455842,
|
|
"learning_rate": 7.03628114789773e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10668784379959106,
|
|
"step": 3365,
|
|
"valid_targets_mean": 1982.4,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 5.273865414710485,
|
|
"grad_norm": 0.7363729512581361,
|
|
"learning_rate": 6.976944825128529e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924624443054199,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2663.1,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.28169014084507,
|
|
"grad_norm": 0.6019426109948102,
|
|
"learning_rate": 6.917806846928663e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13693919777870178,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4641.5,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.289514866979656,
|
|
"grad_norm": 0.7874311050908286,
|
|
"learning_rate": 6.858868113985146e-06,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444309949874878,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2435.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.297339593114241,
|
|
"grad_norm": 0.7446842605889915,
|
|
"learning_rate": 6.800129523950447e-06,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13470521569252014,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2980.9,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 5.305164319248826,
|
|
"grad_norm": 1.0497386976468648,
|
|
"learning_rate": 6.741591971428796e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1739349365234375,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2884.2,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 5.312989045383412,
|
|
"grad_norm": 0.6397178835638737,
|
|
"learning_rate": 6.6832563479625904e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11286614090204239,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3491.8,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 5.320813771517997,
|
|
"grad_norm": 0.7524984776799502,
|
|
"learning_rate": 6.625123542018772e-06,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11447033286094666,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2656.0,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 5.328638497652582,
|
|
"grad_norm": 0.6750893252548031,
|
|
"learning_rate": 6.567194438975329e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466587334871292,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4466.2,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 5.336463223787168,
|
|
"grad_norm": 0.6395431323892509,
|
|
"learning_rate": 6.509469921107787e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15003879368305206,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5007.2,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 5.344287949921752,
|
|
"grad_norm": 0.7385058316794971,
|
|
"learning_rate": 6.451950867575814e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12758992612361908,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2961.0,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.352112676056338,
|
|
"grad_norm": 0.644214291338516,
|
|
"learning_rate": 6.394638154409776e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15139836072921753,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5065.5,
|
|
"valid_targets_min": 1534
|
|
},
|
|
{
|
|
"epoch": 5.359937402190924,
|
|
"grad_norm": 0.7060315762709448,
|
|
"learning_rate": 6.337532654497429e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336570531129837,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3545.4,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 5.367762128325508,
|
|
"grad_norm": 0.6632815975569215,
|
|
"learning_rate": 6.280635237570612e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1689804047346115,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4958.1,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 5.375586854460094,
|
|
"grad_norm": 0.7055878127989395,
|
|
"learning_rate": 6.22394677019202e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13865195214748383,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3239.5,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 5.383411580594679,
|
|
"grad_norm": 0.6167718037103791,
|
|
"learning_rate": 6.16746811574197e-06,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26917511224746704,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5220.6,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 5.391236306729264,
|
|
"grad_norm": 0.9548678744135256,
|
|
"learning_rate": 6.111200134405304e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14370374381542206,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 5.39906103286385,
|
|
"grad_norm": 0.8105491218270925,
|
|
"learning_rate": 6.055143683158206e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14208850264549255,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2867.2,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 5.406885758998435,
|
|
"grad_norm": 0.6697882437680072,
|
|
"learning_rate": 5.999299615755256e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0960645005106926,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2590.6,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 5.41471048513302,
|
|
"grad_norm": 0.7464186268903296,
|
|
"learning_rate": 5.943668782716332e-06,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12398095428943634,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3799.0,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 5.422535211267606,
|
|
"grad_norm": 0.7430630323226317,
|
|
"learning_rate": 5.88825203131373e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15883897244930267,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3860.4,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 5.430359937402191,
|
|
"grad_norm": 0.7006641398152967,
|
|
"learning_rate": 5.8330502055591855e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17472922801971436,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3835.1,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 5.438184663536776,
|
|
"grad_norm": 0.6471001954162151,
|
|
"learning_rate": 5.778064146191098e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15245704352855682,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3955.2,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 5.446009389671362,
|
|
"grad_norm": 0.7160157370066876,
|
|
"learning_rate": 5.7232946906616605e-06,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16182789206504822,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4182.2,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 5.453834115805947,
|
|
"grad_norm": 0.6829665411448274,
|
|
"learning_rate": 5.668742673124154e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13967886567115784,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4560.4,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 5.461658841940532,
|
|
"grad_norm": 0.7125442352222966,
|
|
"learning_rate": 5.614408924420209e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238575279712677,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3035.4,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 5.469483568075117,
|
|
"grad_norm": 0.8574733111432604,
|
|
"learning_rate": 5.560294272067166e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575300693511963,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3376.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.477308294209703,
|
|
"grad_norm": 0.7614238138316003,
|
|
"learning_rate": 5.506399540245466e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494896113872528,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3790.0,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 5.485133020344288,
|
|
"grad_norm": 0.7422316947801164,
|
|
"learning_rate": 5.452725549786104e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10368004441261292,
|
|
"step": 3505,
|
|
"valid_targets_mean": 1974.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 5.492957746478873,
|
|
"grad_norm": 0.8342487137543747,
|
|
"learning_rate": 5.39927311815814e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14374631643295288,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2687.0,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.500782472613459,
|
|
"grad_norm": 0.7990091824604785,
|
|
"learning_rate": 5.346043059456216e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135674059391022,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2588.4,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.508607198748043,
|
|
"grad_norm": 0.7232796765165699,
|
|
"learning_rate": 5.293036184388185e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12992557883262634,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3747.1,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 5.516431924882629,
|
|
"grad_norm": 0.8862947647404135,
|
|
"learning_rate": 5.240253300262743e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10607026517391205,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2046.6,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.524256651017215,
|
|
"grad_norm": 0.6718122504065359,
|
|
"learning_rate": 5.187695210977168e-06,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602775752544403,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3732.5,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 5.532081377151799,
|
|
"grad_norm": 0.7412200950258888,
|
|
"learning_rate": 5.13536271700503e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14402499794960022,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3380.9,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 5.539906103286385,
|
|
"grad_norm": 0.6437606331179158,
|
|
"learning_rate": 5.083256615384035e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16577017307281494,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4652.2,
|
|
"valid_targets_min": 2187
|
|
},
|
|
{
|
|
"epoch": 5.547730829420971,
|
|
"grad_norm": 0.7586442157223253,
|
|
"learning_rate": 5.0313776997038635e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698698043823242,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5517.4,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.7193986889590914,
|
|
"learning_rate": 4.97972676009411e-06,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2056780308485031,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4654.2,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 5.563380281690141,
|
|
"grad_norm": 0.6998964465939187,
|
|
"learning_rate": 4.9283045832122225e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367303729057312,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3164.8,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 5.571205007824727,
|
|
"grad_norm": 0.8500526015634067,
|
|
"learning_rate": 4.877111952231533e-06,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16513508558273315,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2939.8,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 5.579029733959311,
|
|
"grad_norm": 0.7861145849405704,
|
|
"learning_rate": 4.826149646829321e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21976152062416077,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4482.0,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 5.586854460093897,
|
|
"grad_norm": 0.7420450009330348,
|
|
"learning_rate": 4.775418443174971e-06,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13986200094223022,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2874.4,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 5.594679186228482,
|
|
"grad_norm": 0.6436174533097619,
|
|
"learning_rate": 4.724919113918099e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12609705328941345,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4482.4,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 5.602503912363067,
|
|
"grad_norm": 0.6280981784139309,
|
|
"learning_rate": 4.674652428176838e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11008578538894653,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3725.0,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 5.610328638497653,
|
|
"grad_norm": 0.8208479581079361,
|
|
"learning_rate": 4.624619151526069e-06,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16391754150390625,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3736.8,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 5.618153364632238,
|
|
"grad_norm": 0.8097878868137789,
|
|
"learning_rate": 4.57482004598582e-06,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13512974977493286,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2175.5,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 5.625978090766823,
|
|
"grad_norm": 0.790877876104825,
|
|
"learning_rate": 4.52525587000961e-06,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11377212405204773,
|
|
"step": 3595,
|
|
"valid_targets_mean": 2551.1,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 5.633802816901408,
|
|
"grad_norm": 0.8266200511250863,
|
|
"learning_rate": 4.475927378472944e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15062755346298218,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2832.9,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 5.641627543035994,
|
|
"grad_norm": 0.6372633350743774,
|
|
"learning_rate": 4.4268353226617535e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932875335216522,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4687.8,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 5.649452269170579,
|
|
"grad_norm": 0.828022818571926,
|
|
"learning_rate": 4.377980450261025e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18646924197673798,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5003.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 5.657276995305164,
|
|
"grad_norm": 0.6997425275787899,
|
|
"learning_rate": 4.3293635053433605e-06,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16458925604820251,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3486.8,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 5.66510172143975,
|
|
"grad_norm": 1.0965213098713513,
|
|
"learning_rate": 4.280985228357677e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13323703408241272,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2288.9,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 5.672926447574335,
|
|
"grad_norm": 0.6031646419229706,
|
|
"learning_rate": 4.2328463561179014e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16547824442386627,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5265.4,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.68075117370892,
|
|
"grad_norm": 0.727749791204885,
|
|
"learning_rate": 4.184947621791775e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12137381732463837,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3456.1,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 5.688575899843506,
|
|
"grad_norm": 0.8038956099520121,
|
|
"learning_rate": 4.13728975488966e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10637620091438293,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2253.9,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 5.69640062597809,
|
|
"grad_norm": 0.5912011573206017,
|
|
"learning_rate": 4.089873481253468e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12327222526073456,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4514.2,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 5.704225352112676,
|
|
"grad_norm": 0.6963890897603738,
|
|
"learning_rate": 4.042699523045561e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23420825600624084,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4201.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 5.712050078247262,
|
|
"grad_norm": 0.8874390422751951,
|
|
"learning_rate": 3.995768598737779e-06,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15660159289836884,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2480.2,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.719874804381846,
|
|
"grad_norm": 0.7586880308029252,
|
|
"learning_rate": 3.949081423100496e-06,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23996514081954956,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3556.1,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 5.727699530516432,
|
|
"grad_norm": 0.7198779565488995,
|
|
"learning_rate": 3.902638707191717e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14191412925720215,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3379.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.735524256651017,
|
|
"grad_norm": 0.6768017115429462,
|
|
"learning_rate": 3.85644115834628e-06,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11664867401123047,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3004.2,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 5.743348982785602,
|
|
"grad_norm": 0.7101255658021659,
|
|
"learning_rate": 3.8104894801650517e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14925682544708252,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4198.4,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 5.751173708920188,
|
|
"grad_norm": 0.6321480087876172,
|
|
"learning_rate": 3.76478437250422e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468777596950531,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 5.758998435054773,
|
|
"grad_norm": 0.7904927339618169,
|
|
"learning_rate": 3.7193265314646445e-06,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17099037766456604,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3204.4,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 5.766823161189358,
|
|
"grad_norm": 0.7729042719111775,
|
|
"learning_rate": 3.674116649381252e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18481242656707764,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3530.9,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 5.774647887323944,
|
|
"grad_norm": 1.1596987579316396,
|
|
"learning_rate": 3.6291554148124865e-06,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13941723108291626,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3264.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.782472613458529,
|
|
"grad_norm": 0.7509587532643214,
|
|
"learning_rate": 3.5844435125298206e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10201454162597656,
|
|
"step": 3695,
|
|
"valid_targets_mean": 1968.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 5.790297339593114,
|
|
"grad_norm": 0.7858736758431161,
|
|
"learning_rate": 3.539981623507327e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1699228733778,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3386.5,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 5.7981220657277,
|
|
"grad_norm": 0.7183089016509621,
|
|
"learning_rate": 3.495770424911329e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15134656429290771,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3634.2,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 5.805946791862285,
|
|
"grad_norm": 0.6675904847093513,
|
|
"learning_rate": 3.4518105900900432e-06,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09983531385660172,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3532.1,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 5.81377151799687,
|
|
"grad_norm": 0.7562300745638453,
|
|
"learning_rate": 3.408102788563381e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17266729474067688,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3436.6,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 5.821596244131455,
|
|
"grad_norm": 0.8536064558596199,
|
|
"learning_rate": 3.3646476860126787e-06,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14134791493415833,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2446.0,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.829420970266041,
|
|
"grad_norm": 0.6617147715227052,
|
|
"learning_rate": 3.3214459442706405e-06,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316310167312622,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3205.4,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 5.837245696400626,
|
|
"grad_norm": 0.7605265618848737,
|
|
"learning_rate": 3.2784982213111904e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1543911248445511,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2823.1,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.845070422535211,
|
|
"grad_norm": 0.6999570537628235,
|
|
"learning_rate": 3.2358051712395056e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1922643482685089,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4746.2,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.852895148669797,
|
|
"grad_norm": 0.663208320638357,
|
|
"learning_rate": 3.193367444281994e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345275640487671,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4504.2,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 5.860719874804381,
|
|
"grad_norm": 0.7621754623240105,
|
|
"learning_rate": 3.1511856867764547e-06,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14452233910560608,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2685.9,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 5.868544600938967,
|
|
"grad_norm": 0.8145347025695566,
|
|
"learning_rate": 3.109260541162189e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16513952612876892,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3650.0,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.876369327073553,
|
|
"grad_norm": 0.9112659225637342,
|
|
"learning_rate": 3.067592645970241e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16788271069526672,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2464.2,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 5.884194053208137,
|
|
"grad_norm": 0.7049751879998502,
|
|
"learning_rate": 3.026182635813655e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1200672909617424,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3197.6,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.892018779342723,
|
|
"grad_norm": 0.7972470246200496,
|
|
"learning_rate": 2.9850311413778186e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09924145042896271,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2942.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 5.899843505477309,
|
|
"grad_norm": 0.8066365186315473,
|
|
"learning_rate": 2.9441387894108596e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15464389324188232,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3457.5,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 5.907668231611893,
|
|
"grad_norm": 0.8072931442199835,
|
|
"learning_rate": 2.9035062027141014e-06,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800840198993683,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3300.9,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 5.915492957746479,
|
|
"grad_norm": 0.6206038430312263,
|
|
"learning_rate": 2.863134000132566e-06,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12523047626018524,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3907.8,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 5.923317683881065,
|
|
"grad_norm": 0.6684579720388334,
|
|
"learning_rate": 2.8230227965455604e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471487134695053,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4420.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 5.931142410015649,
|
|
"grad_norm": 0.6774926647827009,
|
|
"learning_rate": 2.7831732028573077e-06,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12568917870521545,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3784.0,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 5.938967136150235,
|
|
"grad_norm": 0.6366683798945363,
|
|
"learning_rate": 2.7435858259876358e-06,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17724724113941193,
|
|
"step": 3795,
|
|
"valid_targets_mean": 5393.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.94679186228482,
|
|
"grad_norm": 0.7197173337472124,
|
|
"learning_rate": 2.70426126886276e-06,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11224987357854843,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2623.2,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 5.954616588419405,
|
|
"grad_norm": 0.7622656144712504,
|
|
"learning_rate": 2.665200130406065e-06,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08244765549898148,
|
|
"step": 3805,
|
|
"valid_targets_mean": 1943.2,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 5.962441314553991,
|
|
"grad_norm": 0.6569760737651297,
|
|
"learning_rate": 2.6264030055290057e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12440291792154312,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3876.6,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 5.970266040688576,
|
|
"grad_norm": 0.7463377373933919,
|
|
"learning_rate": 2.5878704851220306e-06,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377275139093399,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3885.0,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 5.978090766823161,
|
|
"grad_norm": 0.7778482554054209,
|
|
"learning_rate": 2.5496031560456124e-06,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17558176815509796,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3471.4,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.985915492957746,
|
|
"grad_norm": 0.7205831402211083,
|
|
"learning_rate": 2.5116016011212697e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13480868935585022,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3355.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.993740219092332,
|
|
"grad_norm": 0.6566432605943197,
|
|
"learning_rate": 2.473866399122733e-06,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15288713574409485,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4080.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 6.001564945226917,
|
|
"grad_norm": 0.7161336037854829,
|
|
"learning_rate": 2.4363981247670722e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.106085866689682,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2775.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.009389671361502,
|
|
"grad_norm": 0.7183721784411516,
|
|
"learning_rate": 2.399197348706017e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10256598144769669,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2388.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.017214397496088,
|
|
"grad_norm": 0.5204929421432274,
|
|
"learning_rate": 2.3622646375171998e-06,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09462379664182663,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4333.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.025039123630673,
|
|
"grad_norm": 0.7464454493725015,
|
|
"learning_rate": 2.3256005536955797e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718260526657104,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3426.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.032863849765258,
|
|
"grad_norm": 0.9495255396215655,
|
|
"learning_rate": 2.289205655644815e-06,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510857880115509,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3217.2,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 6.040688575899844,
|
|
"grad_norm": 0.7448804490958297,
|
|
"learning_rate": 2.253080497668829e-06,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280127465724945,
|
|
"step": 3860,
|
|
"valid_targets_mean": 2653.2,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 6.048513302034428,
|
|
"grad_norm": 0.5942571843363801,
|
|
"learning_rate": 2.217225629963309e-06,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08259809017181396,
|
|
"step": 3865,
|
|
"valid_targets_mean": 2948.4,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 6.056338028169014,
|
|
"grad_norm": 0.65592637834884,
|
|
"learning_rate": 2.181641598607367e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09980684518814087,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2861.1,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.0641627543036,
|
|
"grad_norm": 0.6452503965747388,
|
|
"learning_rate": 2.1463289455551894e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12210479378700256,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4342.5,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 6.071987480438184,
|
|
"grad_norm": 0.6288543987943838,
|
|
"learning_rate": 2.1112882086278107e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07660470902919769,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2343.9,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 6.07981220657277,
|
|
"grad_norm": 0.56020381765012,
|
|
"learning_rate": 2.0765199215049046e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21385148167610168,
|
|
"step": 3885,
|
|
"valid_targets_mean": 7599.9,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 6.087636932707356,
|
|
"grad_norm": 0.6525799811134789,
|
|
"learning_rate": 2.042024613716671e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17322541773319244,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5117.0,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 6.09546165884194,
|
|
"grad_norm": 0.6452664753024571,
|
|
"learning_rate": 2.0078028106357506e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1113201454281807,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3522.9,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 6.103286384976526,
|
|
"grad_norm": 0.8072632883369436,
|
|
"learning_rate": 1.9738550334692475e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225123181939125,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3005.4,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.6254501162012329,
|
|
"learning_rate": 1.9401817992507622e-06,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23574446141719818,
|
|
"step": 3905,
|
|
"valid_targets_mean": 6202.5,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 6.118935837245696,
|
|
"grad_norm": 0.6677504966814077,
|
|
"learning_rate": 1.9067836208325573e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22661812603473663,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5703.5,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.126760563380282,
|
|
"grad_norm": 0.759000110546835,
|
|
"learning_rate": 1.8736610068777006e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312720775604248,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3120.4,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 6.134585289514867,
|
|
"grad_norm": 0.6811335290575548,
|
|
"learning_rate": 1.8408144618523539e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16406355798244476,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4877.6,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 6.142410015649452,
|
|
"grad_norm": 0.8157301346596237,
|
|
"learning_rate": 1.808244486018067e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10478000342845917,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2194.2,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 6.150234741784038,
|
|
"grad_norm": 0.541343841455076,
|
|
"learning_rate": 1.7759515754241753e-06,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07303312420845032,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2663.9,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 6.158059467918623,
|
|
"grad_norm": 0.7338005666978206,
|
|
"learning_rate": 1.7439362219002354e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1603875756263733,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3235.8,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 6.165884194053208,
|
|
"grad_norm": 0.7695333622295769,
|
|
"learning_rate": 1.7121989130485372e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16836965084075928,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3756.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 6.173708920187793,
|
|
"grad_norm": 0.915955135433738,
|
|
"learning_rate": 1.6807401322366711e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365930587053299,
|
|
"step": 3945,
|
|
"valid_targets_mean": 2395.5,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 6.181533646322379,
|
|
"grad_norm": 0.7211328246927876,
|
|
"learning_rate": 1.6495603585901787e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11844424158334732,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3647.2,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 6.189358372456964,
|
|
"grad_norm": 0.6682004785059701,
|
|
"learning_rate": 1.618660066985247e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11863971501588821,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3889.5,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 6.197183098591549,
|
|
"grad_norm": 1.2834994407539608,
|
|
"learning_rate": 1.5880397280414728e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10814309865236282,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2714.4,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 6.205007824726135,
|
|
"grad_norm": 0.6004646132100018,
|
|
"learning_rate": 1.5576998081147144e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14242233335971832,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4594.2,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 6.21283255086072,
|
|
"grad_norm": 0.7419655743321131,
|
|
"learning_rate": 1.5276407692899508e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15867002308368683,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3514.0,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 6.220657276995305,
|
|
"grad_norm": 0.8089821079902558,
|
|
"learning_rate": 1.4978630693742923e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20072036981582642,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.228482003129891,
|
|
"grad_norm": 0.6657277093595313,
|
|
"learning_rate": 1.468367161889963e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11271883547306061,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4146.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 6.236306729264475,
|
|
"grad_norm": 0.8664944813351919,
|
|
"learning_rate": 1.4391534960674336e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1531766951084137,
|
|
"step": 3985,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 6.244131455399061,
|
|
"grad_norm": 0.7150226539608152,
|
|
"learning_rate": 1.4102225168385374e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749469697475433,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3026.8,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 6.251956181533647,
|
|
"grad_norm": 0.7848516213704165,
|
|
"learning_rate": 1.3815746648297347e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12888410687446594,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3176.1,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 6.259780907668231,
|
|
"grad_norm": 0.7973346496068444,
|
|
"learning_rate": 1.3532103763553716e-06,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14321815967559814,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3106.2,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.267605633802817,
|
|
"grad_norm": 0.7039257306065657,
|
|
"learning_rate": 1.3251300834110592e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08347520232200623,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2785.1,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 6.275430359937403,
|
|
"grad_norm": 0.7183107331604626,
|
|
"learning_rate": 1.2973342136670719e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09658422321081161,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2611.9,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.283255086071987,
|
|
"grad_norm": 0.8200297497677161,
|
|
"learning_rate": 1.269823190461843e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10874016582965851,
|
|
"step": 4015,
|
|
"valid_targets_mean": 2179.0,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.291079812206573,
|
|
"grad_norm": 0.7908754570326211,
|
|
"learning_rate": 1.242597432795518e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14318808913230896,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3068.4,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 6.298904538341158,
|
|
"grad_norm": 0.6909049127073966,
|
|
"learning_rate": 1.215657355323585e-06,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16646988689899445,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4583.4,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 6.306729264475743,
|
|
"grad_norm": 0.7016025726694664,
|
|
"learning_rate": 1.189003368350532e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17254388332366943,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4647.5,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 6.314553990610329,
|
|
"grad_norm": 0.6476375797298648,
|
|
"learning_rate": 1.1626358778236192e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14307010173797607,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5704.0,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 6.322378716744914,
|
|
"grad_norm": 0.7949346088553313,
|
|
"learning_rate": 1.1365552853266904e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325366497039795,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3331.8,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.330203442879499,
|
|
"grad_norm": 0.6359546001747867,
|
|
"learning_rate": 1.1107619880740584e-06,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11835367977619171,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3780.1,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 6.338028169014084,
|
|
"grad_norm": 0.840198497907741,
|
|
"learning_rate": 1.085256378904449e-06,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329481154680252,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3105.4,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 6.34585289514867,
|
|
"grad_norm": 0.7677442930069731,
|
|
"learning_rate": 1.0600388462750287e-06,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14786627888679504,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3650.2,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 6.353677621283255,
|
|
"grad_norm": 0.6061723563741847,
|
|
"learning_rate": 1.0351097742554716e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09615639597177505,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3938.4,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 6.36150234741784,
|
|
"grad_norm": 0.7501260497088945,
|
|
"learning_rate": 1.0104695425221367e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1450536996126175,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4408.0,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 6.369327073552426,
|
|
"grad_norm": 0.7659280762497281,
|
|
"learning_rate": 9.861185263522578e-07,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161237895488739,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3806.9,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 6.377151799687011,
|
|
"grad_norm": 0.7868517493007064,
|
|
"learning_rate": 9.620570966182363e-07,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475399136543274,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3408.0,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 6.384976525821596,
|
|
"grad_norm": 0.7182817253359787,
|
|
"learning_rate": 9.382856197820045e-07,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0810198113322258,
|
|
"step": 4080,
|
|
"valid_targets_mean": 2319.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 6.392801251956182,
|
|
"grad_norm": 0.8030965932208856,
|
|
"learning_rate": 9.148044578894311e-07,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14512693881988525,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3951.6,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.400625978090767,
|
|
"grad_norm": 0.6185035373884759,
|
|
"learning_rate": 8.91613968564815e-07,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15244951844215393,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5206.5,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 6.408450704225352,
|
|
"grad_norm": 0.7860093128184092,
|
|
"learning_rate": 8.687145050054279e-07,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365126371383667,
|
|
"step": 4095,
|
|
"valid_targets_mean": 2764.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 6.416275430359938,
|
|
"grad_norm": 0.7459883486942328,
|
|
"learning_rate": 8.461064159761534e-07,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10199165344238281,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2748.1,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.424100156494522,
|
|
"grad_norm": 0.6257209229899477,
|
|
"learning_rate": 8.237900458041492e-07,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13864660263061523,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4637.8,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 6.431924882629108,
|
|
"grad_norm": 0.7190207981477312,
|
|
"learning_rate": 8.017657343736341e-07,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10557948797941208,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2488.8,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 6.439749608763694,
|
|
"grad_norm": 0.8472644819380017,
|
|
"learning_rate": 7.800338171206823e-07,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15867549180984497,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3042.9,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 6.447574334898278,
|
|
"grad_norm": 0.7251744012779257,
|
|
"learning_rate": 7.585946250281373e-07,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08985558152198792,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3030.6,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 6.455399061032864,
|
|
"grad_norm": 0.6715833442511523,
|
|
"learning_rate": 7.374484846205465e-07,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12589898705482483,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3852.6,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 6.463223787167449,
|
|
"grad_norm": 0.7490761334074147,
|
|
"learning_rate": 7.165957179592231e-07,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10826936364173889,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 6.471048513302034,
|
|
"grad_norm": 0.7623275229319371,
|
|
"learning_rate": 6.960366426373033e-07,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14789995551109314,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2616.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 6.47887323943662,
|
|
"grad_norm": 0.6751965265645371,
|
|
"learning_rate": 6.757715717749347e-07,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17765754461288452,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3833.0,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.486697965571205,
|
|
"grad_norm": 0.8408303870693119,
|
|
"learning_rate": 6.558008140145023e-07,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1698603630065918,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3388.6,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 6.49452269170579,
|
|
"grad_norm": 0.7540530338867465,
|
|
"learning_rate": 6.361246735159143e-07,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13106746971607208,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3068.9,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 6.502347417840376,
|
|
"grad_norm": 1.035137802957875,
|
|
"learning_rate": 6.167434499519886e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0975155308842659,
|
|
"step": 4155,
|
|
"valid_targets_mean": 1968.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.510172143974961,
|
|
"grad_norm": 1.1680581125805447,
|
|
"learning_rate": 5.976574385038802e-07,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225200891494751,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3937.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.517996870109546,
|
|
"grad_norm": 0.7696235224604011,
|
|
"learning_rate": 5.788669298565808e-07,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11893462389707565,
|
|
"step": 4165,
|
|
"valid_targets_mean": 2493.6,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 6.525821596244132,
|
|
"grad_norm": 0.7283460723141727,
|
|
"learning_rate": 5.603722101944997e-07,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12703226506710052,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3183.0,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 6.533646322378717,
|
|
"grad_norm": 0.7356095014042086,
|
|
"learning_rate": 5.421735611971013e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13303539156913757,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3526.6,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 6.541471048513302,
|
|
"grad_norm": 0.7824546348605419,
|
|
"learning_rate": 5.242712600346167e-07,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15646567940711975,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3993.5,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.549295774647887,
|
|
"grad_norm": 0.9767806091554654,
|
|
"learning_rate": 5.066655793638209e-07,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135238915681839,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3312.8,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 6.557120500782473,
|
|
"grad_norm": 0.8344041651401242,
|
|
"learning_rate": 4.893567873238781e-07,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14660945534706116,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3297.1,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 6.564945226917058,
|
|
"grad_norm": 0.7132805500723771,
|
|
"learning_rate": 4.7234514753225824e-07,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10995329171419144,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3816.5,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.572769953051643,
|
|
"grad_norm": 0.772012980710538,
|
|
"learning_rate": 4.55630919080734e-07,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12186846137046814,
|
|
"step": 4200,
|
|
"valid_targets_mean": 2943.8,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 6.580594679186229,
|
|
"grad_norm": 0.923061252577419,
|
|
"learning_rate": 4.3921435653141444e-07,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110704928636551,
|
|
"step": 4205,
|
|
"valid_targets_mean": 1796.0,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 6.588419405320813,
|
|
"grad_norm": 0.8468616455748514,
|
|
"learning_rate": 4.2309570991288406e-07,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14820413291454315,
|
|
"step": 4210,
|
|
"valid_targets_mean": 2795.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 6.596244131455399,
|
|
"grad_norm": 0.7035521017568263,
|
|
"learning_rate": 4.0727522471638803e-07,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135016068816185,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4140.5,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 6.604068857589985,
|
|
"grad_norm": 0.6910511355395021,
|
|
"learning_rate": 3.9175314189209056e-07,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850554645061493,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4701.8,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 6.611893583724569,
|
|
"grad_norm": 0.8495842442297227,
|
|
"learning_rate": 3.765296978454136e-07,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12765103578567505,
|
|
"step": 4225,
|
|
"valid_targets_mean": 2577.1,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.619718309859155,
|
|
"grad_norm": 0.886761184108452,
|
|
"learning_rate": 3.6160512443343064e-07,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158132255077362,
|
|
"step": 4230,
|
|
"valid_targets_mean": 1919.4,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 6.627543035993741,
|
|
"grad_norm": 0.647754917536058,
|
|
"learning_rate": 3.469796489613386e-07,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12688162922859192,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4270.0,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 6.635367762128325,
|
|
"grad_norm": 0.9383869479287744,
|
|
"learning_rate": 3.3265349417898497e-07,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.221079483628273,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4232.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 6.643192488262911,
|
|
"grad_norm": 0.6925984095529689,
|
|
"learning_rate": 3.186268782774926e-07,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14068932831287384,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4038.9,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 6.651017214397496,
|
|
"grad_norm": 0.7353345893321505,
|
|
"learning_rate": 3.0490001488592715e-07,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12669852375984192,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3296.5,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 6.658841940532081,
|
|
"grad_norm": 0.7021573351114582,
|
|
"learning_rate": 2.9147311306804593e-07,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19971805810928345,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4394.1,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.820541349943779,
|
|
"learning_rate": 2.7834637731910086e-07,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10248483717441559,
|
|
"step": 4260,
|
|
"valid_targets_mean": 2710.0,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 6.674491392801252,
|
|
"grad_norm": 0.7346475197376867,
|
|
"learning_rate": 2.6552000756274956e-07,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598830461502075,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3078.4,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 6.682316118935837,
|
|
"grad_norm": 0.7072022545816049,
|
|
"learning_rate": 2.5299419914798897e-07,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16383785009384155,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3146.6,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.690140845070422,
|
|
"grad_norm": 0.8239894462277089,
|
|
"learning_rate": 2.407691428461911e-07,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14332835376262665,
|
|
"step": 4275,
|
|
"valid_targets_mean": 2541.6,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.697965571205008,
|
|
"grad_norm": 0.6223529219099725,
|
|
"learning_rate": 2.288450248481877e-07,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15916739404201508,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4396.0,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.705790297339593,
|
|
"grad_norm": 0.712413025248185,
|
|
"learning_rate": 2.1722202676144998e-07,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1842014044523239,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4505.6,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.713615023474178,
|
|
"grad_norm": 0.6906203834399978,
|
|
"learning_rate": 2.0590032560730221e-07,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11809401959180832,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3538.2,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 6.721439749608764,
|
|
"grad_norm": 0.7941311846145527,
|
|
"learning_rate": 1.9488009381824603e-07,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529211401939392,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3125.1,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 6.729264475743349,
|
|
"grad_norm": 0.7237006366354668,
|
|
"learning_rate": 1.8416149923532244e-07,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426929235458374,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4171.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.737089201877934,
|
|
"grad_norm": 0.7833846553474055,
|
|
"learning_rate": 1.737447051055563e-07,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13495653867721558,
|
|
"step": 4305,
|
|
"valid_targets_mean": 2735.0,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 6.74491392801252,
|
|
"grad_norm": 0.7416323860181655,
|
|
"learning_rate": 1.636298700794714e-07,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11681170016527176,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3240.6,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.752738654147105,
|
|
"grad_norm": 0.7498331246049966,
|
|
"learning_rate": 1.538171482086792e-07,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12384027242660522,
|
|
"step": 4315,
|
|
"valid_targets_mean": 2600.5,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.76056338028169,
|
|
"grad_norm": 0.7037576481423821,
|
|
"learning_rate": 1.4430668894352295e-07,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588933318853378,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3573.1,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 6.768388106416276,
|
|
"grad_norm": 0.9181723570127537,
|
|
"learning_rate": 1.3509863713081052e-07,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1210251897573471,
|
|
"step": 4325,
|
|
"valid_targets_mean": 2181.5,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 6.77621283255086,
|
|
"grad_norm": 0.5936698719680746,
|
|
"learning_rate": 1.2619313301159843e-07,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19435220956802368,
|
|
"step": 4330,
|
|
"valid_targets_mean": 6256.9,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 6.784037558685446,
|
|
"grad_norm": 0.7345658158281559,
|
|
"learning_rate": 1.1759031221907135e-07,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12143087387084961,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3185.9,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 6.791862284820032,
|
|
"grad_norm": 0.7855209583493306,
|
|
"learning_rate": 1.0929030577645938e-07,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410100907087326,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4765.6,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 6.799687010954616,
|
|
"grad_norm": 0.8922503026042126,
|
|
"learning_rate": 1.012932400950506e-07,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16632190346717834,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4385.8,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 6.807511737089202,
|
|
"grad_norm": 0.6869834906605482,
|
|
"learning_rate": 9.359923697227047e-08,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1647244393825531,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4976.2,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.815336463223787,
|
|
"grad_norm": 0.7393330466436312,
|
|
"learning_rate": 8.62084135898189e-08,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1692160964012146,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3792.1,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 6.823161189358372,
|
|
"grad_norm": 0.6681245743929103,
|
|
"learning_rate": 7.912088251188277e-08,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14722514152526855,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3531.9,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.830985915492958,
|
|
"grad_norm": 0.6735319845657954,
|
|
"learning_rate": 7.233675168343501e-08,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1108517050743103,
|
|
"step": 4365,
|
|
"valid_targets_mean": 3466.6,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 6.838810641627543,
|
|
"grad_norm": 0.7312487558030715,
|
|
"learning_rate": 6.585612442858269e-08,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12462092190980911,
|
|
"step": 4370,
|
|
"valid_targets_mean": 3539.8,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 6.846635367762128,
|
|
"grad_norm": 0.5523649573200828,
|
|
"learning_rate": 5.967909944898375e-08,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2146455943584442,
|
|
"step": 4375,
|
|
"valid_targets_mean": 6985.8,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 6.854460093896714,
|
|
"grad_norm": 0.7733641289444678,
|
|
"learning_rate": 5.3805770822363826e-08,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11317355930805206,
|
|
"step": 4380,
|
|
"valid_targets_mean": 2682.1,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 6.862284820031299,
|
|
"grad_norm": 0.7519970914466085,
|
|
"learning_rate": 4.823622800106842e-08,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12192918360233307,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3010.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 6.870109546165884,
|
|
"grad_norm": 0.5963610296308547,
|
|
"learning_rate": 4.2970555810706307e-08,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348205804824829,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5337.8,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 6.87793427230047,
|
|
"grad_norm": 0.6682338612081955,
|
|
"learning_rate": 3.8008834448852724e-08,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10944341123104095,
|
|
"step": 4395,
|
|
"valid_targets_mean": 2936.0,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 6.885758998435055,
|
|
"grad_norm": 0.6454618396341081,
|
|
"learning_rate": 3.335113948383706e-08,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12947960197925568,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3972.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 6.89358372456964,
|
|
"grad_norm": 0.7259517374984258,
|
|
"learning_rate": 2.89975418535815e-08,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13472193479537964,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3017.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 6.901408450704225,
|
|
"grad_norm": 0.8377271278514408,
|
|
"learning_rate": 2.4948107864528615e-08,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12895002961158752,
|
|
"step": 4410,
|
|
"valid_targets_mean": 2259.1,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 6.909233176838811,
|
|
"grad_norm": 0.7598473759444859,
|
|
"learning_rate": 2.120289919062879e-08,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19364115595817566,
|
|
"step": 4415,
|
|
"valid_targets_mean": 4954.6,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 6.917057902973396,
|
|
"grad_norm": 0.7721514379644846,
|
|
"learning_rate": 1.776197287239656e-08,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13971076905727386,
|
|
"step": 4420,
|
|
"valid_targets_mean": 3080.0,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.924882629107981,
|
|
"grad_norm": 0.9166891427522832,
|
|
"learning_rate": 1.462538131604907e-08,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16373465955257416,
|
|
"step": 4425,
|
|
"valid_targets_mean": 2393.1,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 6.932707355242567,
|
|
"grad_norm": 0.6443667687456318,
|
|
"learning_rate": 1.179317229270449e-08,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12509199976921082,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3458.5,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 6.940532081377151,
|
|
"grad_norm": 0.7630598227060473,
|
|
"learning_rate": 9.265388937655939e-09,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10497848689556122,
|
|
"step": 4435,
|
|
"valid_targets_mean": 2144.8,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 6.948356807511737,
|
|
"grad_norm": 0.6756763259718751,
|
|
"learning_rate": 7.042069749707559e-09,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290498673915863,
|
|
"step": 4440,
|
|
"valid_targets_mean": 3604.8,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 6.956181533646323,
|
|
"grad_norm": 0.7580151780127758,
|
|
"learning_rate": 5.123248590599428e-09,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12465497851371765,
|
|
"step": 4445,
|
|
"valid_targets_mean": 3437.0,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 6.964006259780907,
|
|
"grad_norm": 0.7787467465822621,
|
|
"learning_rate": 3.5089546844879753e-09,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534889042377472,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3875.2,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 6.971830985915493,
|
|
"grad_norm": 0.8682728939093965,
|
|
"learning_rate": 2.1992126174885663e-09,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15222865343093872,
|
|
"step": 4455,
|
|
"valid_targets_mean": 3019.2,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 6.979655712050079,
|
|
"grad_norm": 0.7309532947460168,
|
|
"learning_rate": 1.1940423373246746e-09,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13520514965057373,
|
|
"step": 4460,
|
|
"valid_targets_mean": 3915.4,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 6.987480438184663,
|
|
"grad_norm": 0.6633042660986664,
|
|
"learning_rate": 4.934591530036947e-10,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08514449000358582,
|
|
"step": 4465,
|
|
"valid_targets_mean": 2938.8,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 6.995305164319249,
|
|
"grad_norm": 0.9028486790743747,
|
|
"learning_rate": 9.747373459267906e-11,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10833489149808884,
|
|
"step": 4470,
|
|
"valid_targets_mean": 2311.9,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11891636997461319,
|
|
"step": 4473,
|
|
"total_flos": 1.1243312580845896e+18,
|
|
"train_loss": 0.34437558135542856,
|
|
"train_runtime": 40056.4465,
|
|
"train_samples_per_second": 1.785,
|
|
"train_steps_per_second": 0.112,
|
|
"valid_targets_mean": 3517.0,
|
|
"valid_targets_min": 1483
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4473,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.1243312580845896e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|