8980 lines
248 KiB
JSON
8980 lines
248 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4060,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008620689655172414,
|
|
"grad_norm": 13.232932097220655,
|
|
"learning_rate": 3.9408866995073894e-07,
|
|
"loss": 0.8415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8860415816307068,
|
|
"step": 5,
|
|
"valid_targets_mean": 2136.3,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.017241379310344827,
|
|
"grad_norm": 7.697570930444164,
|
|
"learning_rate": 8.866995073891626e-07,
|
|
"loss": 0.8298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8184617757797241,
|
|
"step": 10,
|
|
"valid_targets_mean": 5075.8,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.02586206896551724,
|
|
"grad_norm": 11.504287176795119,
|
|
"learning_rate": 1.3793103448275862e-06,
|
|
"loss": 0.8474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8526071310043335,
|
|
"step": 15,
|
|
"valid_targets_mean": 2459.5,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 0.034482758620689655,
|
|
"grad_norm": 11.157594988010677,
|
|
"learning_rate": 1.8719211822660098e-06,
|
|
"loss": 0.8062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8563398718833923,
|
|
"step": 20,
|
|
"valid_targets_mean": 2083.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.04310344827586207,
|
|
"grad_norm": 7.454562769727624,
|
|
"learning_rate": 2.3645320197044334e-06,
|
|
"loss": 0.7396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6408491134643555,
|
|
"step": 25,
|
|
"valid_targets_mean": 2346.5,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 0.05172413793103448,
|
|
"grad_norm": 4.462332245510196,
|
|
"learning_rate": 2.8571428571428573e-06,
|
|
"loss": 0.7238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5656313896179199,
|
|
"step": 30,
|
|
"valid_targets_mean": 2104.9,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.0603448275862069,
|
|
"grad_norm": 3.8825932284325297,
|
|
"learning_rate": 3.349753694581281e-06,
|
|
"loss": 0.7084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7533521056175232,
|
|
"step": 35,
|
|
"valid_targets_mean": 1671.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.06896551724137931,
|
|
"grad_norm": 1.722144166921226,
|
|
"learning_rate": 3.842364532019705e-06,
|
|
"loss": 0.6874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6071873903274536,
|
|
"step": 40,
|
|
"valid_targets_mean": 3528.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 0.07758620689655173,
|
|
"grad_norm": 2.321454376114494,
|
|
"learning_rate": 4.334975369458129e-06,
|
|
"loss": 0.6636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6896767616271973,
|
|
"step": 45,
|
|
"valid_targets_mean": 1569.0,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 0.08620689655172414,
|
|
"grad_norm": 1.2086382136138112,
|
|
"learning_rate": 4.8275862068965525e-06,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5344650745391846,
|
|
"step": 50,
|
|
"valid_targets_mean": 2312.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.09482758620689655,
|
|
"grad_norm": 0.9529660756316026,
|
|
"learning_rate": 5.320197044334976e-06,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4809268116950989,
|
|
"step": 55,
|
|
"valid_targets_mean": 2783.3,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 0.10344827586206896,
|
|
"grad_norm": 0.780182127462651,
|
|
"learning_rate": 5.812807881773399e-06,
|
|
"loss": 0.5396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4358481764793396,
|
|
"step": 60,
|
|
"valid_targets_mean": 3265.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 0.11206896551724138,
|
|
"grad_norm": 0.9472444738751354,
|
|
"learning_rate": 6.305418719211823e-06,
|
|
"loss": 0.5647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6075949668884277,
|
|
"step": 65,
|
|
"valid_targets_mean": 2665.1,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.1206896551724138,
|
|
"grad_norm": 0.830653122252629,
|
|
"learning_rate": 6.798029556650246e-06,
|
|
"loss": 0.5177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5087354779243469,
|
|
"step": 70,
|
|
"valid_targets_mean": 2723.9,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.12931034482758622,
|
|
"grad_norm": 1.0815877554445863,
|
|
"learning_rate": 7.290640394088671e-06,
|
|
"loss": 0.5517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.686690092086792,
|
|
"step": 75,
|
|
"valid_targets_mean": 1865.0,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.13793103448275862,
|
|
"grad_norm": 1.132738933241941,
|
|
"learning_rate": 7.783251231527095e-06,
|
|
"loss": 0.6183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5349428653717041,
|
|
"step": 80,
|
|
"valid_targets_mean": 2359.5,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 0.14655172413793102,
|
|
"grad_norm": 0.6591541818226556,
|
|
"learning_rate": 8.275862068965518e-06,
|
|
"loss": 0.4737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.382580041885376,
|
|
"step": 85,
|
|
"valid_targets_mean": 3268.1,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.15517241379310345,
|
|
"grad_norm": 0.9248007332339961,
|
|
"learning_rate": 8.768472906403942e-06,
|
|
"loss": 0.5053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.527409553527832,
|
|
"step": 90,
|
|
"valid_targets_mean": 2054.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.16379310344827586,
|
|
"grad_norm": 0.9086933978595335,
|
|
"learning_rate": 9.261083743842364e-06,
|
|
"loss": 0.5679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6158876419067383,
|
|
"step": 95,
|
|
"valid_targets_mean": 2684.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.1724137931034483,
|
|
"grad_norm": 0.8976459156554277,
|
|
"learning_rate": 9.75369458128079e-06,
|
|
"loss": 0.5457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6348117589950562,
|
|
"step": 100,
|
|
"valid_targets_mean": 2928.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.1810344827586207,
|
|
"grad_norm": 0.902855097961049,
|
|
"learning_rate": 1.0246305418719214e-05,
|
|
"loss": 0.5007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5758171677589417,
|
|
"step": 105,
|
|
"valid_targets_mean": 2074.6,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.1896551724137931,
|
|
"grad_norm": 0.5842031863511825,
|
|
"learning_rate": 1.0738916256157637e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38487473130226135,
|
|
"step": 110,
|
|
"valid_targets_mean": 3755.7,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.19827586206896552,
|
|
"grad_norm": 0.7740217123427929,
|
|
"learning_rate": 1.123152709359606e-05,
|
|
"loss": 0.502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43103402853012085,
|
|
"step": 115,
|
|
"valid_targets_mean": 2872.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.20689655172413793,
|
|
"grad_norm": 0.8902368577963162,
|
|
"learning_rate": 1.1724137931034483e-05,
|
|
"loss": 0.552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.548175573348999,
|
|
"step": 120,
|
|
"valid_targets_mean": 2121.9,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.21551724137931033,
|
|
"grad_norm": 0.8264039936367132,
|
|
"learning_rate": 1.2216748768472909e-05,
|
|
"loss": 0.4823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4839908480644226,
|
|
"step": 125,
|
|
"valid_targets_mean": 2181.6,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.22413793103448276,
|
|
"grad_norm": 0.8450133097474695,
|
|
"learning_rate": 1.2709359605911331e-05,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45593246817588806,
|
|
"step": 130,
|
|
"valid_targets_mean": 2023.6,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 0.23275862068965517,
|
|
"grad_norm": 0.5788964343898347,
|
|
"learning_rate": 1.3201970443349755e-05,
|
|
"loss": 0.5249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3811132609844208,
|
|
"step": 135,
|
|
"valid_targets_mean": 3966.9,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 0.2413793103448276,
|
|
"grad_norm": 0.7632885346479155,
|
|
"learning_rate": 1.369458128078818e-05,
|
|
"loss": 0.5116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.56120765209198,
|
|
"step": 140,
|
|
"valid_targets_mean": 3156.3,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 0.8620275887963595,
|
|
"learning_rate": 1.4187192118226602e-05,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.53715980052948,
|
|
"step": 145,
|
|
"valid_targets_mean": 2210.4,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.25862068965517243,
|
|
"grad_norm": 0.8511906311114458,
|
|
"learning_rate": 1.4679802955665026e-05,
|
|
"loss": 0.4255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4668218493461609,
|
|
"step": 150,
|
|
"valid_targets_mean": 2173.1,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 0.2672413793103448,
|
|
"grad_norm": 0.7555714152272107,
|
|
"learning_rate": 1.5172413793103448e-05,
|
|
"loss": 0.4816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37490010261535645,
|
|
"step": 155,
|
|
"valid_targets_mean": 2356.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.27586206896551724,
|
|
"grad_norm": 0.8402578283696395,
|
|
"learning_rate": 1.5665024630541875e-05,
|
|
"loss": 0.5105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5459040403366089,
|
|
"step": 160,
|
|
"valid_targets_mean": 2340.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.28448275862068967,
|
|
"grad_norm": 0.6775840481703621,
|
|
"learning_rate": 1.6157635467980298e-05,
|
|
"loss": 0.4612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36925819516181946,
|
|
"step": 165,
|
|
"valid_targets_mean": 2578.4,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 0.29310344827586204,
|
|
"grad_norm": 0.780109213871394,
|
|
"learning_rate": 1.665024630541872e-05,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5196021795272827,
|
|
"step": 170,
|
|
"valid_targets_mean": 2996.8,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 0.3017241379310345,
|
|
"grad_norm": 0.8548697531410809,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.4598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4733397662639618,
|
|
"step": 175,
|
|
"valid_targets_mean": 2293.2,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.3103448275862069,
|
|
"grad_norm": 0.8546736192908461,
|
|
"learning_rate": 1.7635467980295567e-05,
|
|
"loss": 0.4672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43413564562797546,
|
|
"step": 180,
|
|
"valid_targets_mean": 1991.5,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 0.31896551724137934,
|
|
"grad_norm": 0.7252441236863357,
|
|
"learning_rate": 1.8128078817733993e-05,
|
|
"loss": 0.4961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5404946208000183,
|
|
"step": 185,
|
|
"valid_targets_mean": 3216.4,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 0.3275862068965517,
|
|
"grad_norm": 0.7989378071687083,
|
|
"learning_rate": 1.8620689655172415e-05,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5027145743370056,
|
|
"step": 190,
|
|
"valid_targets_mean": 2232.9,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.33620689655172414,
|
|
"grad_norm": 0.7178395258693016,
|
|
"learning_rate": 1.911330049261084e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5232639312744141,
|
|
"step": 195,
|
|
"valid_targets_mean": 3374.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.6329553171336134,
|
|
"learning_rate": 1.9605911330049263e-05,
|
|
"loss": 0.4526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4167773425579071,
|
|
"step": 200,
|
|
"valid_targets_mean": 3209.2,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 0.35344827586206895,
|
|
"grad_norm": 0.6728546038871683,
|
|
"learning_rate": 2.0098522167487688e-05,
|
|
"loss": 0.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3922424018383026,
|
|
"step": 205,
|
|
"valid_targets_mean": 2740.6,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.3620689655172414,
|
|
"grad_norm": 1.04961738262584,
|
|
"learning_rate": 2.059113300492611e-05,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4533177614212036,
|
|
"step": 210,
|
|
"valid_targets_mean": 2375.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 0.3706896551724138,
|
|
"grad_norm": 0.7434878133431768,
|
|
"learning_rate": 2.1083743842364536e-05,
|
|
"loss": 0.4855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5148615837097168,
|
|
"step": 215,
|
|
"valid_targets_mean": 3240.9,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.3793103448275862,
|
|
"grad_norm": 0.7872528477220174,
|
|
"learning_rate": 2.1576354679802954e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4126091003417969,
|
|
"step": 220,
|
|
"valid_targets_mean": 2226.2,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 0.3879310344827586,
|
|
"grad_norm": 0.7540671720188216,
|
|
"learning_rate": 2.206896551724138e-05,
|
|
"loss": 0.4648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4633001387119293,
|
|
"step": 225,
|
|
"valid_targets_mean": 2634.1,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.39655172413793105,
|
|
"grad_norm": 0.7420533564001498,
|
|
"learning_rate": 2.2561576354679805e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4774375259876251,
|
|
"step": 230,
|
|
"valid_targets_mean": 2631.4,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 0.4051724137931034,
|
|
"grad_norm": 0.6157590867590423,
|
|
"learning_rate": 2.3054187192118228e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43844375014305115,
|
|
"step": 235,
|
|
"valid_targets_mean": 4082.3,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 0.41379310344827586,
|
|
"grad_norm": 0.8565991200200499,
|
|
"learning_rate": 2.3546798029556653e-05,
|
|
"loss": 0.5309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6025887727737427,
|
|
"step": 240,
|
|
"valid_targets_mean": 2522.6,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.4224137931034483,
|
|
"grad_norm": 1.2583799529045883,
|
|
"learning_rate": 2.403940886699508e-05,
|
|
"loss": 0.4855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5208061933517456,
|
|
"step": 245,
|
|
"valid_targets_mean": 2395.9,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.43103448275862066,
|
|
"grad_norm": 0.8146647432675135,
|
|
"learning_rate": 2.4532019704433497e-05,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45655834674835205,
|
|
"step": 250,
|
|
"valid_targets_mean": 2176.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.4396551724137931,
|
|
"grad_norm": 0.8072309949308645,
|
|
"learning_rate": 2.5024630541871923e-05,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47415584325790405,
|
|
"step": 255,
|
|
"valid_targets_mean": 2326.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.4482758620689655,
|
|
"grad_norm": 0.8227419849582898,
|
|
"learning_rate": 2.551724137931035e-05,
|
|
"loss": 0.4459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39482447504997253,
|
|
"step": 260,
|
|
"valid_targets_mean": 1956.3,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 0.45689655172413796,
|
|
"grad_norm": 0.930242183186987,
|
|
"learning_rate": 2.600985221674877e-05,
|
|
"loss": 0.4728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44298598170280457,
|
|
"step": 265,
|
|
"valid_targets_mean": 1632.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.46551724137931033,
|
|
"grad_norm": 0.6101603625456252,
|
|
"learning_rate": 2.6502463054187196e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47349387407302856,
|
|
"step": 270,
|
|
"valid_targets_mean": 4623.9,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.47413793103448276,
|
|
"grad_norm": 0.9537571622572907,
|
|
"learning_rate": 2.6995073891625615e-05,
|
|
"loss": 0.5005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47361212968826294,
|
|
"step": 275,
|
|
"valid_targets_mean": 2541.9,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.4827586206896552,
|
|
"grad_norm": 0.7408116261053846,
|
|
"learning_rate": 2.748768472906404e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5130395889282227,
|
|
"step": 280,
|
|
"valid_targets_mean": 2848.1,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.49137931034482757,
|
|
"grad_norm": 0.6817201938253903,
|
|
"learning_rate": 2.7980295566502466e-05,
|
|
"loss": 0.494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45302706956863403,
|
|
"step": 285,
|
|
"valid_targets_mean": 3216.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.8714105802109559,
|
|
"learning_rate": 2.8472906403940888e-05,
|
|
"loss": 0.4814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5157756805419922,
|
|
"step": 290,
|
|
"valid_targets_mean": 2243.6,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 0.5086206896551724,
|
|
"grad_norm": 0.7282708487682565,
|
|
"learning_rate": 2.8965517241379313e-05,
|
|
"loss": 0.4614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39844846725463867,
|
|
"step": 295,
|
|
"valid_targets_mean": 3180.0,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.5172413793103449,
|
|
"grad_norm": 0.8975983503967481,
|
|
"learning_rate": 2.945812807881774e-05,
|
|
"loss": 0.4388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.531843900680542,
|
|
"step": 300,
|
|
"valid_targets_mean": 1914.1,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 0.5258620689655172,
|
|
"grad_norm": 0.8163712348212677,
|
|
"learning_rate": 2.9950738916256158e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5456841588020325,
|
|
"step": 305,
|
|
"valid_targets_mean": 2314.6,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.5344827586206896,
|
|
"grad_norm": 0.7895716575686172,
|
|
"learning_rate": 3.0443349753694583e-05,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4308232069015503,
|
|
"step": 310,
|
|
"valid_targets_mean": 2846.6,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 0.5431034482758621,
|
|
"grad_norm": 0.6475104958991397,
|
|
"learning_rate": 3.093596059113301e-05,
|
|
"loss": 0.4426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39463210105895996,
|
|
"step": 315,
|
|
"valid_targets_mean": 3386.5,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.5517241379310345,
|
|
"grad_norm": 0.9928118152840254,
|
|
"learning_rate": 3.142857142857143e-05,
|
|
"loss": 0.478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4573419690132141,
|
|
"step": 320,
|
|
"valid_targets_mean": 1744.3,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 0.5603448275862069,
|
|
"grad_norm": 0.8950697219459826,
|
|
"learning_rate": 3.1921182266009856e-05,
|
|
"loss": 0.4686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41824567317962646,
|
|
"step": 325,
|
|
"valid_targets_mean": 1810.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 0.5689655172413793,
|
|
"grad_norm": 0.8043290863445247,
|
|
"learning_rate": 3.2413793103448275e-05,
|
|
"loss": 0.4703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5139874815940857,
|
|
"step": 330,
|
|
"valid_targets_mean": 2277.0,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.5775862068965517,
|
|
"grad_norm": 0.9413544056683327,
|
|
"learning_rate": 3.29064039408867e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40947747230529785,
|
|
"step": 335,
|
|
"valid_targets_mean": 1762.7,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 0.5862068965517241,
|
|
"grad_norm": 0.6861532735672352,
|
|
"learning_rate": 3.3399014778325126e-05,
|
|
"loss": 0.463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39968305826187134,
|
|
"step": 340,
|
|
"valid_targets_mean": 3021.8,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 0.5948275862068966,
|
|
"grad_norm": 0.8640437276593032,
|
|
"learning_rate": 3.389162561576355e-05,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43528294563293457,
|
|
"step": 345,
|
|
"valid_targets_mean": 2015.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.603448275862069,
|
|
"grad_norm": 0.732764848171335,
|
|
"learning_rate": 3.438423645320197e-05,
|
|
"loss": 0.4797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5329480171203613,
|
|
"step": 350,
|
|
"valid_targets_mean": 2950.9,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.6120689655172413,
|
|
"grad_norm": 0.8024994379243645,
|
|
"learning_rate": 3.4876847290640396e-05,
|
|
"loss": 0.4418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4507256746292114,
|
|
"step": 355,
|
|
"valid_targets_mean": 2193.6,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 0.6206896551724138,
|
|
"grad_norm": 0.8262666854372246,
|
|
"learning_rate": 3.536945812807882e-05,
|
|
"loss": 0.4798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48532766103744507,
|
|
"step": 360,
|
|
"valid_targets_mean": 2739.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 0.6293103448275862,
|
|
"grad_norm": 0.6332844053249423,
|
|
"learning_rate": 3.586206896551725e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117547035217285,
|
|
"step": 365,
|
|
"valid_targets_mean": 2887.0,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.6379310344827587,
|
|
"grad_norm": 0.7460508213663771,
|
|
"learning_rate": 3.6354679802955665e-05,
|
|
"loss": 0.4704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5197142958641052,
|
|
"step": 370,
|
|
"valid_targets_mean": 2832.7,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.646551724137931,
|
|
"grad_norm": 0.5464572201060651,
|
|
"learning_rate": 3.684729064039409e-05,
|
|
"loss": 0.43,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35164475440979004,
|
|
"step": 375,
|
|
"valid_targets_mean": 4484.4,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.6551724137931034,
|
|
"grad_norm": 0.8991528330150939,
|
|
"learning_rate": 3.7339901477832516e-05,
|
|
"loss": 0.4636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3793317675590515,
|
|
"step": 380,
|
|
"valid_targets_mean": 1875.5,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.6637931034482759,
|
|
"grad_norm": 0.6949477428765862,
|
|
"learning_rate": 3.7832512315270935e-05,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4816117286682129,
|
|
"step": 385,
|
|
"valid_targets_mean": 4579.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.6724137931034483,
|
|
"grad_norm": 0.6593748377546642,
|
|
"learning_rate": 3.832512315270936e-05,
|
|
"loss": 0.4543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4020620584487915,
|
|
"step": 390,
|
|
"valid_targets_mean": 2879.8,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 0.6810344827586207,
|
|
"grad_norm": 1.0434173212328082,
|
|
"learning_rate": 3.8817733990147786e-05,
|
|
"loss": 0.4919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5102270245552063,
|
|
"step": 395,
|
|
"valid_targets_mean": 1963.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.891541913502292,
|
|
"learning_rate": 3.931034482758621e-05,
|
|
"loss": 0.4897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5392195582389832,
|
|
"step": 400,
|
|
"valid_targets_mean": 2574.2,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 0.6982758620689655,
|
|
"grad_norm": 1.1541729925888546,
|
|
"learning_rate": 3.980295566502464e-05,
|
|
"loss": 0.4299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42217105627059937,
|
|
"step": 405,
|
|
"valid_targets_mean": 2676.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.7068965517241379,
|
|
"grad_norm": 0.7698263830350504,
|
|
"learning_rate": 3.999993347192948e-05,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4036504328250885,
|
|
"step": 410,
|
|
"valid_targets_mean": 2536.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.7155172413793104,
|
|
"grad_norm": 0.7305436039635469,
|
|
"learning_rate": 3.9999526913101334e-05,
|
|
"loss": 0.4429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45616382360458374,
|
|
"step": 415,
|
|
"valid_targets_mean": 2623.6,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.7241379310344828,
|
|
"grad_norm": 0.5924152937193563,
|
|
"learning_rate": 3.999875076298832e-05,
|
|
"loss": 0.4298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3307499587535858,
|
|
"step": 420,
|
|
"valid_targets_mean": 3216.8,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.7327586206896551,
|
|
"grad_norm": 0.7974007037020275,
|
|
"learning_rate": 3.9997605035933704e-05,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3921046555042267,
|
|
"step": 425,
|
|
"valid_targets_mean": 2243.4,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 0.7413793103448276,
|
|
"grad_norm": 0.7809983920045804,
|
|
"learning_rate": 3.99960897531105e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4133235216140747,
|
|
"step": 430,
|
|
"valid_targets_mean": 2451.4,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 0.8564857581354621,
|
|
"learning_rate": 3.999420494252116e-05,
|
|
"loss": 0.4612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.50236976146698,
|
|
"step": 435,
|
|
"valid_targets_mean": 2297.1,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.7586206896551724,
|
|
"grad_norm": 0.8282054432302532,
|
|
"learning_rate": 3.9991950638996976e-05,
|
|
"loss": 0.4775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4601110816001892,
|
|
"step": 440,
|
|
"valid_targets_mean": 1987.6,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.7672413793103449,
|
|
"grad_norm": 2.196261301737067,
|
|
"learning_rate": 3.998932688419748e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40777525305747986,
|
|
"step": 445,
|
|
"valid_targets_mean": 2384.4,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 0.7758620689655172,
|
|
"grad_norm": 0.7767034549725097,
|
|
"learning_rate": 3.9986333726609674e-05,
|
|
"loss": 0.4409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40235668420791626,
|
|
"step": 450,
|
|
"valid_targets_mean": 2375.9,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 0.7844827586206896,
|
|
"grad_norm": 0.8868383113758573,
|
|
"learning_rate": 3.99829712215471e-05,
|
|
"loss": 0.4696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4738074839115143,
|
|
"step": 455,
|
|
"valid_targets_mean": 2075.4,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.7931034482758621,
|
|
"grad_norm": 0.8653398113931736,
|
|
"learning_rate": 3.997923943114886e-05,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48855382204055786,
|
|
"step": 460,
|
|
"valid_targets_mean": 3139.3,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 0.8017241379310345,
|
|
"grad_norm": 0.683892302463927,
|
|
"learning_rate": 3.997513842437845e-05,
|
|
"loss": 0.4311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36282363533973694,
|
|
"step": 465,
|
|
"valid_targets_mean": 2642.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.8103448275862069,
|
|
"grad_norm": 0.9752721796080772,
|
|
"learning_rate": 3.997066827702248e-05,
|
|
"loss": 0.4858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4777313768863678,
|
|
"step": 470,
|
|
"valid_targets_mean": 1706.8,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 0.8189655172413793,
|
|
"grad_norm": 0.6314778562370668,
|
|
"learning_rate": 3.996582907168928e-05,
|
|
"loss": 0.5172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4231725335121155,
|
|
"step": 475,
|
|
"valid_targets_mean": 3065.4,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.8275862068965517,
|
|
"grad_norm": 0.8832569013931396,
|
|
"learning_rate": 3.996062089780737e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3854944705963135,
|
|
"step": 480,
|
|
"valid_targets_mean": 2018.0,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.8362068965517241,
|
|
"grad_norm": 0.6652631917767244,
|
|
"learning_rate": 3.99550438516238e-05,
|
|
"loss": 0.5466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4254167377948761,
|
|
"step": 485,
|
|
"valid_targets_mean": 3028.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.8448275862068966,
|
|
"grad_norm": 0.7597451955001047,
|
|
"learning_rate": 3.994909803620241e-05,
|
|
"loss": 0.456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44718462228775024,
|
|
"step": 490,
|
|
"valid_targets_mean": 2557.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 0.853448275862069,
|
|
"grad_norm": 0.6578252774007807,
|
|
"learning_rate": 3.994278356142187e-05,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44781360030174255,
|
|
"step": 495,
|
|
"valid_targets_mean": 2988.4,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.8620689655172413,
|
|
"grad_norm": 0.7262448958158562,
|
|
"learning_rate": 3.993610054397368e-05,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3711667060852051,
|
|
"step": 500,
|
|
"valid_targets_mean": 2528.9,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.8706896551724138,
|
|
"grad_norm": 0.7147220841254082,
|
|
"learning_rate": 3.992904910736001e-05,
|
|
"loss": 0.4604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.397768497467041,
|
|
"step": 505,
|
|
"valid_targets_mean": 2382.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.8793103448275862,
|
|
"grad_norm": 0.6122583548543834,
|
|
"learning_rate": 3.9921629381891425e-05,
|
|
"loss": 0.39,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4616737365722656,
|
|
"step": 510,
|
|
"valid_targets_mean": 3468.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.8879310344827587,
|
|
"grad_norm": 0.6885272133475613,
|
|
"learning_rate": 3.991384150468445e-05,
|
|
"loss": 0.4546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.431570440530777,
|
|
"step": 515,
|
|
"valid_targets_mean": 2822.1,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 0.896551724137931,
|
|
"grad_norm": 0.6550575188514011,
|
|
"learning_rate": 3.9905685619659074e-05,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3826547861099243,
|
|
"step": 520,
|
|
"valid_targets_mean": 3025.6,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.9051724137931034,
|
|
"grad_norm": 0.7376316328489212,
|
|
"learning_rate": 3.9897161877536076e-05,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44813811779022217,
|
|
"step": 525,
|
|
"valid_targets_mean": 2054.6,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.9137931034482759,
|
|
"grad_norm": 0.6800801555525916,
|
|
"learning_rate": 3.9888270435834196e-05,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3814975917339325,
|
|
"step": 530,
|
|
"valid_targets_mean": 2481.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.9224137931034483,
|
|
"grad_norm": 0.678026309874535,
|
|
"learning_rate": 3.987901145886731e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.454716295003891,
|
|
"step": 535,
|
|
"valid_targets_mean": 3485.5,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.9310344827586207,
|
|
"grad_norm": 0.733184976085652,
|
|
"learning_rate": 3.9869385117741314e-05,
|
|
"loss": 0.4626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42944812774658203,
|
|
"step": 540,
|
|
"valid_targets_mean": 2215.9,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.9396551724137931,
|
|
"grad_norm": 0.7245253036454354,
|
|
"learning_rate": 3.985939159035101e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40529000759124756,
|
|
"step": 545,
|
|
"valid_targets_mean": 2216.6,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.9482758620689655,
|
|
"grad_norm": 0.8082542554339605,
|
|
"learning_rate": 3.98490310613768e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5691283345222473,
|
|
"step": 550,
|
|
"valid_targets_mean": 2477.1,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.9568965517241379,
|
|
"grad_norm": 0.6170643339917548,
|
|
"learning_rate": 3.983830372228127e-05,
|
|
"loss": 0.4762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4863627552986145,
|
|
"step": 555,
|
|
"valid_targets_mean": 3696.4,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.9655172413793104,
|
|
"grad_norm": 0.828710236174964,
|
|
"learning_rate": 3.982720977130567e-05,
|
|
"loss": 0.4584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5036779642105103,
|
|
"step": 560,
|
|
"valid_targets_mean": 2395.6,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 0.9741379310344828,
|
|
"grad_norm": 0.8786478661180452,
|
|
"learning_rate": 3.9815749413466204e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46909040212631226,
|
|
"step": 565,
|
|
"valid_targets_mean": 1995.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.9827586206896551,
|
|
"grad_norm": 0.7947261832670387,
|
|
"learning_rate": 3.980392286055033e-05,
|
|
"loss": 0.4518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4978959262371063,
|
|
"step": 570,
|
|
"valid_targets_mean": 2086.5,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.9913793103448276,
|
|
"grad_norm": 0.7098014379889231,
|
|
"learning_rate": 3.979173033111275e-05,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4036133289337158,
|
|
"step": 575,
|
|
"valid_targets_mean": 2342.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.6775755166421004,
|
|
"learning_rate": 3.977917205047142e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4571645259857178,
|
|
"step": 580,
|
|
"valid_targets_mean": 2889.3,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.0086206896551724,
|
|
"grad_norm": 0.6102387669296445,
|
|
"learning_rate": 3.976624825070339e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29137107729911804,
|
|
"step": 585,
|
|
"valid_targets_mean": 2821.2,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.0172413793103448,
|
|
"grad_norm": 0.6197666440180915,
|
|
"learning_rate": 3.97529591706405e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37029892206192017,
|
|
"step": 590,
|
|
"valid_targets_mean": 2887.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.0258620689655173,
|
|
"grad_norm": 0.8840160427547796,
|
|
"learning_rate": 3.973930505586496e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4517512321472168,
|
|
"step": 595,
|
|
"valid_targets_mean": 1778.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.0344827586206897,
|
|
"grad_norm": 0.6650314148453361,
|
|
"learning_rate": 3.972528615870483e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3971402645111084,
|
|
"step": 600,
|
|
"valid_targets_mean": 2811.0,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.043103448275862,
|
|
"grad_norm": 0.8885280375871449,
|
|
"learning_rate": 3.9710902738229354e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4812260866165161,
|
|
"step": 605,
|
|
"valid_targets_mean": 1807.2,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 1.0517241379310345,
|
|
"grad_norm": 0.8235174714216085,
|
|
"learning_rate": 3.9696155060244166e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.527877688407898,
|
|
"step": 610,
|
|
"valid_targets_mean": 2185.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.0603448275862069,
|
|
"grad_norm": 0.6243759540097059,
|
|
"learning_rate": 3.968104339728636e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3239450454711914,
|
|
"step": 615,
|
|
"valid_targets_mean": 2600.6,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 1.0689655172413792,
|
|
"grad_norm": 0.8320317076108008,
|
|
"learning_rate": 3.966556802861951e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4784361720085144,
|
|
"step": 620,
|
|
"valid_targets_mean": 2049.8,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.0775862068965518,
|
|
"grad_norm": 0.7676936416325113,
|
|
"learning_rate": 3.964972924022843e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45696157217025757,
|
|
"step": 625,
|
|
"valid_targets_mean": 2586.2,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.0862068965517242,
|
|
"grad_norm": 0.7315583973749349,
|
|
"learning_rate": 3.963352732481396e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49207547307014465,
|
|
"step": 630,
|
|
"valid_targets_mean": 2997.6,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 1.0948275862068966,
|
|
"grad_norm": 0.6524701307936455,
|
|
"learning_rate": 3.961696258178752e-05,
|
|
"loss": 0.4073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3828135132789612,
|
|
"step": 635,
|
|
"valid_targets_mean": 3398.6,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.103448275862069,
|
|
"grad_norm": 0.6371602051142804,
|
|
"learning_rate": 3.960003531726559e-05,
|
|
"loss": 0.3559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3406589925289154,
|
|
"step": 640,
|
|
"valid_targets_mean": 3585.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.1120689655172413,
|
|
"grad_norm": 0.5890368242311975,
|
|
"learning_rate": 3.958274584406403e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33770039677619934,
|
|
"step": 645,
|
|
"valid_targets_mean": 3216.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.1206896551724137,
|
|
"grad_norm": 0.7050319619089337,
|
|
"learning_rate": 3.956509448169233e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342927098274231,
|
|
"step": 650,
|
|
"valid_targets_mean": 2537.4,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 1.1293103448275863,
|
|
"grad_norm": 0.7624278995803477,
|
|
"learning_rate": 3.9547081556347693e-05,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42083901166915894,
|
|
"step": 655,
|
|
"valid_targets_mean": 2359.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.1379310344827587,
|
|
"grad_norm": 0.8322591973207496,
|
|
"learning_rate": 3.952870740090901e-05,
|
|
"loss": 0.413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29296737909317017,
|
|
"step": 660,
|
|
"valid_targets_mean": 1606.6,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 1.146551724137931,
|
|
"grad_norm": 0.9566943537898349,
|
|
"learning_rate": 3.950997235493069e-05,
|
|
"loss": 0.3897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3879561722278595,
|
|
"step": 665,
|
|
"valid_targets_mean": 1809.0,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.1551724137931034,
|
|
"grad_norm": 0.8626141996861321,
|
|
"learning_rate": 3.9490876764636414e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3392297327518463,
|
|
"step": 670,
|
|
"valid_targets_mean": 2330.3,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.1637931034482758,
|
|
"grad_norm": 0.798327566418468,
|
|
"learning_rate": 3.947142098291272e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3780513405799866,
|
|
"step": 675,
|
|
"valid_targets_mean": 1909.8,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 1.1724137931034484,
|
|
"grad_norm": 0.7799978912543006,
|
|
"learning_rate": 3.945160536930247e-05,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38355159759521484,
|
|
"step": 680,
|
|
"valid_targets_mean": 1956.6,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.1810344827586208,
|
|
"grad_norm": 0.7170303781637705,
|
|
"learning_rate": 3.9431430289998235e-05,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41659241914749146,
|
|
"step": 685,
|
|
"valid_targets_mean": 2568.3,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.1896551724137931,
|
|
"grad_norm": 0.75541871927773,
|
|
"learning_rate": 3.941089611783551e-05,
|
|
"loss": 0.4384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5742608904838562,
|
|
"step": 690,
|
|
"valid_targets_mean": 3480.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 1.1982758620689655,
|
|
"grad_norm": 0.7273236482011581,
|
|
"learning_rate": 3.939000323228583e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3808523416519165,
|
|
"step": 695,
|
|
"valid_targets_mean": 2336.6,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 1.206896551724138,
|
|
"grad_norm": 0.7744304859786367,
|
|
"learning_rate": 3.9368752019449744e-05,
|
|
"loss": 0.4469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46578434109687805,
|
|
"step": 700,
|
|
"valid_targets_mean": 2492.6,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.2155172413793103,
|
|
"grad_norm": 0.6368586747092351,
|
|
"learning_rate": 3.934714287204969e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132917881011963,
|
|
"step": 705,
|
|
"valid_targets_mean": 2797.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.2241379310344827,
|
|
"grad_norm": 0.7193926603705026,
|
|
"learning_rate": 3.932517618942275e-05,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49011844396591187,
|
|
"step": 710,
|
|
"valid_targets_mean": 2756.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.2327586206896552,
|
|
"grad_norm": 0.8222090867667902,
|
|
"learning_rate": 3.930285237751324e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3633798360824585,
|
|
"step": 715,
|
|
"valid_targets_mean": 2080.2,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.2413793103448276,
|
|
"grad_norm": 0.6879632039050765,
|
|
"learning_rate": 3.928017184886525e-05,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3634766936302185,
|
|
"step": 720,
|
|
"valid_targets_mean": 2422.1,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.669091661082806,
|
|
"learning_rate": 3.925713502261496e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4206310510635376,
|
|
"step": 725,
|
|
"valid_targets_mean": 2847.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.2586206896551724,
|
|
"grad_norm": 0.7599239264639392,
|
|
"learning_rate": 3.9233742324482965e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45150965452194214,
|
|
"step": 730,
|
|
"valid_targets_mean": 3899.8,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.2672413793103448,
|
|
"grad_norm": 0.8458728794370647,
|
|
"learning_rate": 3.920999418676636e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3931143879890442,
|
|
"step": 735,
|
|
"valid_targets_mean": 1709.2,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.2758620689655173,
|
|
"grad_norm": 0.7894601441756182,
|
|
"learning_rate": 3.918589104833075e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5081537961959839,
|
|
"step": 740,
|
|
"valid_targets_mean": 2482.3,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 1.2844827586206897,
|
|
"grad_norm": 0.8608351924274511,
|
|
"learning_rate": 3.916143335460218e-05,
|
|
"loss": 0.4317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.375831663608551,
|
|
"step": 745,
|
|
"valid_targets_mean": 1595.8,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.293103448275862,
|
|
"grad_norm": 0.6124934646738293,
|
|
"learning_rate": 3.913662155755885e-05,
|
|
"loss": 0.4397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48569154739379883,
|
|
"step": 750,
|
|
"valid_targets_mean": 4658.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.3017241379310345,
|
|
"grad_norm": 0.6136863641105471,
|
|
"learning_rate": 3.911145611572282e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40924838185310364,
|
|
"step": 755,
|
|
"valid_targets_mean": 3167.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.3103448275862069,
|
|
"grad_norm": 0.7193681878175996,
|
|
"learning_rate": 3.908593749415148e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48019519448280334,
|
|
"step": 760,
|
|
"valid_targets_mean": 2553.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.3189655172413794,
|
|
"grad_norm": 0.6821922155446412,
|
|
"learning_rate": 3.9060066164428986e-05,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4456404745578766,
|
|
"step": 765,
|
|
"valid_targets_mean": 3264.4,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.3275862068965516,
|
|
"grad_norm": 0.6012395192250871,
|
|
"learning_rate": 3.903384260465756e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192451596260071,
|
|
"step": 770,
|
|
"valid_targets_mean": 2788.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 1.3362068965517242,
|
|
"grad_norm": 0.7545058489849539,
|
|
"learning_rate": 3.900726729944861e-05,
|
|
"loss": 0.431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47883346676826477,
|
|
"step": 775,
|
|
"valid_targets_mean": 2500.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.3448275862068966,
|
|
"grad_norm": 0.6398010325865217,
|
|
"learning_rate": 3.898034073991382e-05,
|
|
"loss": 0.4135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4672417640686035,
|
|
"step": 780,
|
|
"valid_targets_mean": 3337.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.353448275862069,
|
|
"grad_norm": 0.7940809550754068,
|
|
"learning_rate": 3.8953063423656055e-05,
|
|
"loss": 0.424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42937517166137695,
|
|
"step": 785,
|
|
"valid_targets_mean": 2104.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.3620689655172413,
|
|
"grad_norm": 0.711072268408022,
|
|
"learning_rate": 3.892543585476014e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4857082962989807,
|
|
"step": 790,
|
|
"valid_targets_mean": 2814.8,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.3706896551724137,
|
|
"grad_norm": 0.7932387173216984,
|
|
"learning_rate": 3.88974585437836e-05,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5068721175193787,
|
|
"step": 795,
|
|
"valid_targets_mean": 3371.6,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.3793103448275863,
|
|
"grad_norm": 0.7379868428248753,
|
|
"learning_rate": 3.886913200774717e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39806318283081055,
|
|
"step": 800,
|
|
"valid_targets_mean": 2662.9,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.3879310344827587,
|
|
"grad_norm": 0.9372146576345584,
|
|
"learning_rate": 3.884045677012528e-05,
|
|
"loss": 0.4324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43995919823646545,
|
|
"step": 805,
|
|
"valid_targets_mean": 1663.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.396551724137931,
|
|
"grad_norm": 0.757584914847117,
|
|
"learning_rate": 3.8811433360836364e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39269834756851196,
|
|
"step": 810,
|
|
"valid_targets_mean": 2236.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.4051724137931034,
|
|
"grad_norm": 0.5602666343134688,
|
|
"learning_rate": 3.878206231623306e-05,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34488746523857117,
|
|
"step": 815,
|
|
"valid_targets_mean": 3282.4,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.4137931034482758,
|
|
"grad_norm": 0.8360209814458835,
|
|
"learning_rate": 3.8752344179092315e-05,
|
|
"loss": 0.4534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6250336170196533,
|
|
"step": 820,
|
|
"valid_targets_mean": 3202.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 1.4224137931034484,
|
|
"grad_norm": 0.6716355795895128,
|
|
"learning_rate": 3.8722279498605344e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34475111961364746,
|
|
"step": 825,
|
|
"valid_targets_mean": 2427.8,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 1.4310344827586206,
|
|
"grad_norm": 0.7510883988029478,
|
|
"learning_rate": 3.869186883036748e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42797213792800903,
|
|
"step": 830,
|
|
"valid_targets_mean": 2374.5,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.4396551724137931,
|
|
"grad_norm": 0.7944589287393519,
|
|
"learning_rate": 3.8661112736367924e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38339388370513916,
|
|
"step": 835,
|
|
"valid_targets_mean": 2018.9,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.4482758620689655,
|
|
"grad_norm": 0.6465050270956495,
|
|
"learning_rate": 3.863001178497933e-05,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5302772521972656,
|
|
"step": 840,
|
|
"valid_targets_mean": 4361.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.456896551724138,
|
|
"grad_norm": 0.6893879526286418,
|
|
"learning_rate": 3.8598566550947316e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4017932415008545,
|
|
"step": 845,
|
|
"valid_targets_mean": 2625.7,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.4655172413793103,
|
|
"grad_norm": 0.6220088023463586,
|
|
"learning_rate": 3.856677761537986e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36116647720336914,
|
|
"step": 850,
|
|
"valid_targets_mean": 3468.6,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.4741379310344827,
|
|
"grad_norm": 0.7694318201664595,
|
|
"learning_rate": 3.853464556573652e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.380726158618927,
|
|
"step": 855,
|
|
"valid_targets_mean": 2097.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.4827586206896552,
|
|
"grad_norm": 0.7201246052401616,
|
|
"learning_rate": 3.850217099581764e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46923041343688965,
|
|
"step": 860,
|
|
"valid_targets_mean": 2454.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.4913793103448276,
|
|
"grad_norm": 0.7112727683269261,
|
|
"learning_rate": 3.8469354505753305e-05,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3220486044883728,
|
|
"step": 865,
|
|
"valid_targets_mean": 2655.5,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.6213614398189481,
|
|
"learning_rate": 3.843619670199229e-05,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33441856503486633,
|
|
"step": 870,
|
|
"valid_targets_mean": 2729.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.5086206896551724,
|
|
"grad_norm": 0.7534771914043414,
|
|
"learning_rate": 3.8402698197290865e-05,
|
|
"loss": 0.4389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46794092655181885,
|
|
"step": 875,
|
|
"valid_targets_mean": 3115.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.5172413793103448,
|
|
"grad_norm": 0.6724604582524113,
|
|
"learning_rate": 3.8368859610701443e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5096204280853271,
|
|
"step": 880,
|
|
"valid_targets_mean": 3365.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 1.5258620689655173,
|
|
"grad_norm": 0.537324077917551,
|
|
"learning_rate": 3.833468156756114e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39847302436828613,
|
|
"step": 885,
|
|
"valid_targets_mean": 4028.4,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.5344827586206895,
|
|
"grad_norm": 0.7701273286666982,
|
|
"learning_rate": 3.8300164699480246e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3641321361064911,
|
|
"step": 890,
|
|
"valid_targets_mean": 1731.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.543103448275862,
|
|
"grad_norm": 0.6768973576564309,
|
|
"learning_rate": 3.8265309644330535e-05,
|
|
"loss": 0.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40518438816070557,
|
|
"step": 895,
|
|
"valid_targets_mean": 3067.6,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 1.5517241379310345,
|
|
"grad_norm": 0.8375219158955782,
|
|
"learning_rate": 3.823011704623347e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.400987446308136,
|
|
"step": 900,
|
|
"valid_targets_mean": 1611.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.5603448275862069,
|
|
"grad_norm": 0.8211855004487335,
|
|
"learning_rate": 3.81945875555483e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39641138911247253,
|
|
"step": 905,
|
|
"valid_targets_mean": 1993.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 1.5689655172413794,
|
|
"grad_norm": 0.8218745944898366,
|
|
"learning_rate": 3.8158721828860094e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34812313318252563,
|
|
"step": 910,
|
|
"valid_targets_mean": 1692.9,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.5775862068965516,
|
|
"grad_norm": 0.7720513032419637,
|
|
"learning_rate": 3.81225205289675e-05,
|
|
"loss": 0.4902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4618193507194519,
|
|
"step": 915,
|
|
"valid_targets_mean": 1989.4,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 1.5862068965517242,
|
|
"grad_norm": 0.7623625823563448,
|
|
"learning_rate": 3.808598432487061e-05,
|
|
"loss": 0.4345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36552631855010986,
|
|
"step": 920,
|
|
"valid_targets_mean": 1844.8,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.5948275862068966,
|
|
"grad_norm": 0.6438978080095269,
|
|
"learning_rate": 3.8049113891758506e-05,
|
|
"loss": 0.4502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39778804779052734,
|
|
"step": 925,
|
|
"valid_targets_mean": 2994.9,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.603448275862069,
|
|
"grad_norm": 0.7649241814152833,
|
|
"learning_rate": 3.8011909910996856e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43716567754745483,
|
|
"step": 930,
|
|
"valid_targets_mean": 2140.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.6120689655172413,
|
|
"grad_norm": 0.580587049117197,
|
|
"learning_rate": 3.797437307011527e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35338088870048523,
|
|
"step": 935,
|
|
"valid_targets_mean": 3349.8,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.6206896551724137,
|
|
"grad_norm": 0.7312067736555172,
|
|
"learning_rate": 3.793650406279463e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40459704399108887,
|
|
"step": 940,
|
|
"valid_targets_mean": 1908.5,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.6293103448275863,
|
|
"grad_norm": 0.8452807897938808,
|
|
"learning_rate": 3.789830358885423e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42247670888900757,
|
|
"step": 945,
|
|
"valid_targets_mean": 1825.6,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.6379310344827587,
|
|
"grad_norm": 0.6507520907626049,
|
|
"learning_rate": 3.7859772354238885e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28464314341545105,
|
|
"step": 950,
|
|
"valid_targets_mean": 2569.2,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 1.646551724137931,
|
|
"grad_norm": 0.7658684085183802,
|
|
"learning_rate": 3.782091107100587e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4443947970867157,
|
|
"step": 955,
|
|
"valid_targets_mean": 2170.6,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 1.6551724137931034,
|
|
"grad_norm": 1.2494001805866435,
|
|
"learning_rate": 3.7781720457311746e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5146511793136597,
|
|
"step": 960,
|
|
"valid_targets_mean": 2532.3,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.6637931034482758,
|
|
"grad_norm": 0.6526502409039252,
|
|
"learning_rate": 3.7742201237399105e-05,
|
|
"loss": 0.4514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34717223048210144,
|
|
"step": 965,
|
|
"valid_targets_mean": 3003.6,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 1.6724137931034484,
|
|
"grad_norm": 0.633402843925448,
|
|
"learning_rate": 3.77023541415832e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34919875860214233,
|
|
"step": 970,
|
|
"valid_targets_mean": 2739.6,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.6810344827586206,
|
|
"grad_norm": 0.8387142971624745,
|
|
"learning_rate": 3.7662179906238405e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40993672609329224,
|
|
"step": 975,
|
|
"valid_targets_mean": 2246.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.6896551724137931,
|
|
"grad_norm": 0.5085763777226048,
|
|
"learning_rate": 3.762167927378464e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33271750807762146,
|
|
"step": 980,
|
|
"valid_targets_mean": 4219.9,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.6982758620689655,
|
|
"grad_norm": 0.8224745860109742,
|
|
"learning_rate": 3.7580852992673656e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3762546479701996,
|
|
"step": 985,
|
|
"valid_targets_mean": 1791.2,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 1.706896551724138,
|
|
"grad_norm": 0.5920143777699354,
|
|
"learning_rate": 3.7539701817375185e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3287311792373657,
|
|
"step": 990,
|
|
"valid_targets_mean": 3017.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.7155172413793105,
|
|
"grad_norm": 0.5939374862414969,
|
|
"learning_rate": 3.7498226508362996e-05,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27790141105651855,
|
|
"step": 995,
|
|
"valid_targets_mean": 2668.7,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.7241379310344827,
|
|
"grad_norm": 0.6941009800354047,
|
|
"learning_rate": 3.7456427832100864e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.491094708442688,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4933.6,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.7327586206896552,
|
|
"grad_norm": 0.6943581356651782,
|
|
"learning_rate": 3.7414306561028385e-05,
|
|
"loss": 0.4164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4229108393192291,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2461.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.7413793103448276,
|
|
"grad_norm": 0.8064131925426529,
|
|
"learning_rate": 3.73718634735467e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45140957832336426,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2074.4,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 0.6813330765539505,
|
|
"learning_rate": 3.732909935400412e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3701205253601074,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3159.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.7586206896551724,
|
|
"grad_norm": 0.6288867610404782,
|
|
"learning_rate": 3.7286014992681645e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3469887375831604,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2883.7,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.7672413793103448,
|
|
"grad_norm": 0.7143527167015993,
|
|
"learning_rate": 3.7242611185778325e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39058348536491394,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3864.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.7758620689655173,
|
|
"grad_norm": 0.5703494416813504,
|
|
"learning_rate": 3.7198888735396574e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.342609703540802,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3531.6,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.7844827586206895,
|
|
"grad_norm": 0.6952050006138021,
|
|
"learning_rate": 3.7154848449527334e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4868992567062378,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2866.4,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.793103448275862,
|
|
"grad_norm": 0.6282722343659998,
|
|
"learning_rate": 3.7110491142035145e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3678615093231201,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3136.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.8017241379310345,
|
|
"grad_norm": 0.5812176719759966,
|
|
"learning_rate": 3.7065817632643115e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3459870517253876,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3125.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.8103448275862069,
|
|
"grad_norm": 0.7052883776391125,
|
|
"learning_rate": 3.702082874691776e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3673224449157715,
|
|
"step": 1050,
|
|
"valid_targets_mean": 2189.1,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.8189655172413794,
|
|
"grad_norm": 0.6973382773007308,
|
|
"learning_rate": 3.6975525316253744e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34974607825279236,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2217.2,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.8275862068965516,
|
|
"grad_norm": 0.7266969496321424,
|
|
"learning_rate": 3.692990817785853e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42130789160728455,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2347.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.8362068965517242,
|
|
"grad_norm": 0.8676321505533798,
|
|
"learning_rate": 3.68839781747369e-05,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45284566283226013,
|
|
"step": 1065,
|
|
"valid_targets_mean": 1771.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.8448275862068966,
|
|
"grad_norm": 0.6589301963607709,
|
|
"learning_rate": 3.683773615567538e-05,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3539057970046997,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2677.9,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 1.853448275862069,
|
|
"grad_norm": 0.6167457014344452,
|
|
"learning_rate": 3.679118297522654e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4817582666873932,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4339.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 1.8620689655172413,
|
|
"grad_norm": 0.6055737080045898,
|
|
"learning_rate": 3.674431949369321e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.393863320350647,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3241.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.8706896551724137,
|
|
"grad_norm": 0.7354615943580206,
|
|
"learning_rate": 3.6697146577112614e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38788315653800964,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2015.6,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.8793103448275863,
|
|
"grad_norm": 0.5760016126037003,
|
|
"learning_rate": 3.6649665097240304e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40716418623924255,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4327.3,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 1.8879310344827587,
|
|
"grad_norm": 0.8615793077201944,
|
|
"learning_rate": 3.660187593153408e-05,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43805018067359924,
|
|
"step": 1095,
|
|
"valid_targets_mean": 1688.0,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.896551724137931,
|
|
"grad_norm": 0.7457791465766714,
|
|
"learning_rate": 3.655377996313782e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4772360324859619,
|
|
"step": 1100,
|
|
"valid_targets_mean": 2212.9,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 1.9051724137931034,
|
|
"grad_norm": 0.8291844822006753,
|
|
"learning_rate": 3.6505378080865054e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39961403608322144,
|
|
"step": 1105,
|
|
"valid_targets_mean": 1644.9,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 1.9137931034482758,
|
|
"grad_norm": 0.6199256600700211,
|
|
"learning_rate": 3.645667117918265e-05,
|
|
"loss": 0.4192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3381462097167969,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2744.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.9224137931034484,
|
|
"grad_norm": 0.5746653636562877,
|
|
"learning_rate": 3.640766015819423e-05,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.423374205827713,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3882.1,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 1.9310344827586206,
|
|
"grad_norm": 0.6329881260544258,
|
|
"learning_rate": 3.6358345923623506e-05,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031822443008423,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2636.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.9396551724137931,
|
|
"grad_norm": 0.6746260613469616,
|
|
"learning_rate": 3.630872938679761e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3765380382537842,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2744.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.9482758620689655,
|
|
"grad_norm": 0.740987702919496,
|
|
"learning_rate": 3.6258811464630215e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42373108863830566,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2425.2,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 1.956896551724138,
|
|
"grad_norm": 1.1924553665440873,
|
|
"learning_rate": 3.620859307960458e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285548686981201,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2777.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.9655172413793105,
|
|
"grad_norm": 0.8258503182816348,
|
|
"learning_rate": 3.615807515975654e-05,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43213579058647156,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2270.6,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.9741379310344827,
|
|
"grad_norm": 0.6833220821483821,
|
|
"learning_rate": 3.6107258638657324e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122459948062897,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4119.2,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.9827586206896552,
|
|
"grad_norm": 0.5830920412565893,
|
|
"learning_rate": 3.60561444553963e-05,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3681791424751282,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3160.9,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 1.9913793103448276,
|
|
"grad_norm": 0.6162725013998724,
|
|
"learning_rate": 3.600473355456366e-05,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34768128395080566,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3020.3,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.7220829440099563,
|
|
"learning_rate": 3.595302688623291e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4216357171535492,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2232.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 2.0086206896551726,
|
|
"grad_norm": 0.5388192966250313,
|
|
"learning_rate": 3.590102540594337e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30984219908714294,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3784.2,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 2.0172413793103448,
|
|
"grad_norm": 0.8860377930426859,
|
|
"learning_rate": 3.584873007468244e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3425954580307007,
|
|
"step": 1170,
|
|
"valid_targets_mean": 1921.7,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 2.0258620689655173,
|
|
"grad_norm": 0.8903193161432715,
|
|
"learning_rate": 3.5796141858867935e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3836989998817444,
|
|
"step": 1175,
|
|
"valid_targets_mean": 1732.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.0344827586206895,
|
|
"grad_norm": 0.6655554260518024,
|
|
"learning_rate": 3.5743261730330144e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734143376350403,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3244.6,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 2.043103448275862,
|
|
"grad_norm": 0.6589382550025945,
|
|
"learning_rate": 3.569009066629392e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3362818956375122,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3155.6,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 2.0517241379310347,
|
|
"grad_norm": 0.6918050697024621,
|
|
"learning_rate": 3.56366296493606e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342352509498596,
|
|
"step": 1190,
|
|
"valid_targets_mean": 2828.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 2.060344827586207,
|
|
"grad_norm": 0.5940371066958855,
|
|
"learning_rate": 3.558287966748985e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669927477836609,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3061.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.0689655172413794,
|
|
"grad_norm": 0.7643289898339554,
|
|
"learning_rate": 3.552884171398141e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4045037031173706,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2579.3,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.0775862068965516,
|
|
"grad_norm": 0.7255326164964023,
|
|
"learning_rate": 3.547451678745673e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39175868034362793,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2542.5,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.086206896551724,
|
|
"grad_norm": 0.6060718767054583,
|
|
"learning_rate": 3.541990589184053e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4336078464984894,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3638.5,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.0948275862068964,
|
|
"grad_norm": 0.6766990332271949,
|
|
"learning_rate": 3.5365010036342245e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32076266407966614,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2834.5,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 2.103448275862069,
|
|
"grad_norm": 0.5734779038893596,
|
|
"learning_rate": 3.530983023543734e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31001657247543335,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3574.9,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.1120689655172415,
|
|
"grad_norm": 0.7199219118404978,
|
|
"learning_rate": 3.525436750884863e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31847256422042847,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2214.8,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.1206896551724137,
|
|
"grad_norm": 0.6319108730231816,
|
|
"learning_rate": 3.5198622881527374e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28605881333351135,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3245.0,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.1293103448275863,
|
|
"grad_norm": 0.7995321706961999,
|
|
"learning_rate": 3.514259738363436e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3353400230407715,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2027.1,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.1379310344827585,
|
|
"grad_norm": 0.9179748361646247,
|
|
"learning_rate": 3.5086292050520855e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39520493149757385,
|
|
"step": 1240,
|
|
"valid_targets_mean": 1937.8,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 2.146551724137931,
|
|
"grad_norm": 0.746855498744147,
|
|
"learning_rate": 3.502970792270951e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3741573393344879,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2679.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.1551724137931036,
|
|
"grad_norm": 0.5754114961452039,
|
|
"learning_rate": 3.497284604587508e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210458755493164,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3502.5,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.163793103448276,
|
|
"grad_norm": 0.7426374111463406,
|
|
"learning_rate": 3.491570747082512e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950843274593353,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 2.1724137931034484,
|
|
"grad_norm": 1.8569462813349802,
|
|
"learning_rate": 3.485829325348059e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4585845470428467,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2958.1,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 2.1810344827586206,
|
|
"grad_norm": 0.6461462169152513,
|
|
"learning_rate": 3.4800604454856284e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35911306738853455,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2807.1,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 2.189655172413793,
|
|
"grad_norm": 0.8080624401623676,
|
|
"learning_rate": 3.47426421410413e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31265851855278015,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2058.2,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 2.1982758620689653,
|
|
"grad_norm": 0.6467103643654887,
|
|
"learning_rate": 3.468440738317926e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3470854163169861,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3573.7,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.206896551724138,
|
|
"grad_norm": 0.7060195928575449,
|
|
"learning_rate": 3.4625901257448596e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3791043758392334,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2588.9,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.2155172413793105,
|
|
"grad_norm": 0.8796348183577576,
|
|
"learning_rate": 3.4567124845042564e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27518510818481445,
|
|
"step": 1285,
|
|
"valid_targets_mean": 1862.4,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 2.2241379310344827,
|
|
"grad_norm": 0.9628728050045314,
|
|
"learning_rate": 3.4508079232149354e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4080553352832794,
|
|
"step": 1290,
|
|
"valid_targets_mean": 1559.8,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 2.2327586206896552,
|
|
"grad_norm": 0.7472287967147371,
|
|
"learning_rate": 3.444876550993198e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33648034930229187,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2121.4,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.2413793103448274,
|
|
"grad_norm": 0.5763763906754586,
|
|
"learning_rate": 3.4389184774508105e-05,
|
|
"loss": 0.3693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3669682741165161,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3822.4,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 0.8021437893371579,
|
|
"learning_rate": 3.43293381269298e-05,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201998174190521,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2141.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 2.2586206896551726,
|
|
"grad_norm": 0.7305450212853413,
|
|
"learning_rate": 3.4269226673163204e-05,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47764334082603455,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3005.6,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 2.2672413793103448,
|
|
"grad_norm": 0.8083273631520955,
|
|
"learning_rate": 3.420885152406805e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3901101052761078,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2275.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 2.2758620689655173,
|
|
"grad_norm": 0.8109990424140212,
|
|
"learning_rate": 3.4148213795377194e-05,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37412458658218384,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2063.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 2.2844827586206895,
|
|
"grad_norm": 0.7476899330396594,
|
|
"learning_rate": 3.408731460767593e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29301315546035767,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2032.8,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 2.293103448275862,
|
|
"grad_norm": 0.7384812273892642,
|
|
"learning_rate": 3.402615508638134e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4986970126628876,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2887.1,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 2.3017241379310347,
|
|
"grad_norm": 0.7052675589889301,
|
|
"learning_rate": 3.396473636172146e-05,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002275824546814,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2545.4,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 2.310344827586207,
|
|
"grad_norm": 0.5638329722524688,
|
|
"learning_rate": 3.3903059568714406e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29387524724006653,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3717.7,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 2.3189655172413794,
|
|
"grad_norm": 0.7437822158482291,
|
|
"learning_rate": 3.384112584714739e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086732029914856,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2345.1,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 2.3275862068965516,
|
|
"grad_norm": 0.7665275506599184,
|
|
"learning_rate": 3.377893634155568e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3466002345085144,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2198.2,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 2.336206896551724,
|
|
"grad_norm": 0.6885948848323417,
|
|
"learning_rate": 3.371649220120143e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33025288581848145,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2540.4,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.344827586206897,
|
|
"grad_norm": 0.6702991821826371,
|
|
"learning_rate": 3.365379458005243e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3554913103580475,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3490.3,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 2.353448275862069,
|
|
"grad_norm": 0.7820230821530418,
|
|
"learning_rate": 3.35908446367608e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3224671185016632,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2158.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.3620689655172415,
|
|
"grad_norm": 0.5499398168119826,
|
|
"learning_rate": 3.35276435346416e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3069545030593872,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3997.6,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.3706896551724137,
|
|
"grad_norm": 0.6187347278761689,
|
|
"learning_rate": 3.346419244165127e-05,
|
|
"loss": 0.3362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29844409227371216,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3237.6,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.3793103448275863,
|
|
"grad_norm": 0.7649535914549306,
|
|
"learning_rate": 3.3400492530366086e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073192834854126,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2159.3,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 2.3879310344827585,
|
|
"grad_norm": 0.8065445997332823,
|
|
"learning_rate": 3.333654497796051e-05,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4503655433654785,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2368.5,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 2.396551724137931,
|
|
"grad_norm": 0.9539292555942607,
|
|
"learning_rate": 3.32723509661854e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35420340299606323,
|
|
"step": 1390,
|
|
"valid_targets_mean": 1735.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 2.405172413793103,
|
|
"grad_norm": 0.7435624854998906,
|
|
"learning_rate": 3.320791168134617e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867245078086853,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2049.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.413793103448276,
|
|
"grad_norm": 0.8410354465466265,
|
|
"learning_rate": 3.31432283142809e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4442535638809204,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2262.7,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.4224137931034484,
|
|
"grad_norm": 0.6903927781580114,
|
|
"learning_rate": 3.307830206033831e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3711048364639282,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3179.2,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 2.4310344827586206,
|
|
"grad_norm": 0.695084735647335,
|
|
"learning_rate": 3.301313411935565e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3399720788002014,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2799.4,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 2.439655172413793,
|
|
"grad_norm": 0.7060158875096876,
|
|
"learning_rate": 3.294772569563656e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4565882086753845,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3125.3,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 2.4482758620689653,
|
|
"grad_norm": 0.6993437246470752,
|
|
"learning_rate": 3.28820779979288e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3832154870033264,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3026.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.456896551724138,
|
|
"grad_norm": 0.7671380866195133,
|
|
"learning_rate": 3.281619223940192e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39568156003952026,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3380.5,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.4655172413793105,
|
|
"grad_norm": 0.883160039143343,
|
|
"learning_rate": 3.2750069637624826e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4419388771057129,
|
|
"step": 1430,
|
|
"valid_targets_mean": 1897.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.4741379310344827,
|
|
"grad_norm": 0.7456455502612334,
|
|
"learning_rate": 3.2683711414543295e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38482749462127686,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2422.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 2.4827586206896552,
|
|
"grad_norm": 0.6311742479799494,
|
|
"learning_rate": 3.261711879645737e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3681245744228363,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3291.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 2.4913793103448274,
|
|
"grad_norm": 0.9504806362945273,
|
|
"learning_rate": 3.255029301399873e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3798632025718689,
|
|
"step": 1445,
|
|
"valid_targets_mean": 1594.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.8003715063056267,
|
|
"learning_rate": 3.248323530210793e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36555957794189453,
|
|
"step": 1450,
|
|
"valid_targets_mean": 1955.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 2.5086206896551726,
|
|
"grad_norm": 0.7228548026264292,
|
|
"learning_rate": 3.241594690001157e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3639114499092102,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2435.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.5172413793103448,
|
|
"grad_norm": 0.7739840718205107,
|
|
"learning_rate": 3.2348429051199424e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35158008337020874,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2565.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 2.5258620689655173,
|
|
"grad_norm": 0.6029145997859495,
|
|
"learning_rate": 3.228068300340142e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789810299873352,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3232.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.5344827586206895,
|
|
"grad_norm": 0.6812715584245135,
|
|
"learning_rate": 3.221271000856462e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30192792415618896,
|
|
"step": 1470,
|
|
"valid_targets_mean": 2195.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.543103448275862,
|
|
"grad_norm": 0.6507849147630267,
|
|
"learning_rate": 3.214451132283006e-05,
|
|
"loss": 0.3808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3988229036331177,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.5517241379310347,
|
|
"grad_norm": 0.6225641239120543,
|
|
"learning_rate": 3.207608820650955e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806398868560791,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2902.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 2.560344827586207,
|
|
"grad_norm": 0.7164560899643635,
|
|
"learning_rate": 3.2007441924062374e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189341425895691,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2501.9,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 2.5689655172413794,
|
|
"grad_norm": 1.1127820484519313,
|
|
"learning_rate": 3.193857374407192e-05,
|
|
"loss": 0.3498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3648238480091095,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2207.4,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 2.5775862068965516,
|
|
"grad_norm": 0.6715513908381312,
|
|
"learning_rate": 3.186948493922225e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119972050189972,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2720.2,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 2.586206896551724,
|
|
"grad_norm": 0.7059911737426282,
|
|
"learning_rate": 3.180017678627458e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30181488394737244,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2658.7,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 2.594827586206897,
|
|
"grad_norm": 0.6789907795293526,
|
|
"learning_rate": 3.173065056604366e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32512885332107544,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2610.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.603448275862069,
|
|
"grad_norm": 0.7487712135796768,
|
|
"learning_rate": 3.166090756337415e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37083733081817627,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2299.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.612068965517241,
|
|
"grad_norm": 0.7645675510189072,
|
|
"learning_rate": 3.159094906711683e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42760002613067627,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2592.6,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 2.6206896551724137,
|
|
"grad_norm": 0.9936249106784035,
|
|
"learning_rate": 3.15207763701048e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4002576172351837,
|
|
"step": 1520,
|
|
"valid_targets_mean": 1749.7,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.6293103448275863,
|
|
"grad_norm": 0.8230639044983854,
|
|
"learning_rate": 3.14503907691296e-05,
|
|
"loss": 0.3642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38861727714538574,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2186.8,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 2.637931034482759,
|
|
"grad_norm": 0.8025375712809919,
|
|
"learning_rate": 3.1379793564917235e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3810392916202545,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2889.3,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.646551724137931,
|
|
"grad_norm": 0.5988230969931015,
|
|
"learning_rate": 3.130898606210414e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2393997609615326,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3076.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.655172413793103,
|
|
"grad_norm": 0.728559481458037,
|
|
"learning_rate": 3.1237969569213056e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38803786039352417,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3300.9,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 2.663793103448276,
|
|
"grad_norm": 0.7872815008631463,
|
|
"learning_rate": 3.1166745398628874e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38372093439102173,
|
|
"step": 1545,
|
|
"valid_targets_mean": 2159.7,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.6724137931034484,
|
|
"grad_norm": 0.7072698084992165,
|
|
"learning_rate": 3.109531486657437e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29802533984184265,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2321.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 2.6810344827586206,
|
|
"grad_norm": 0.7613110972073325,
|
|
"learning_rate": 3.102367929308586e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3502175211906433,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2251.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.689655172413793,
|
|
"grad_norm": 0.6782266299730657,
|
|
"learning_rate": 3.0951840001988854e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3079949915409088,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2867.9,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 2.6982758620689653,
|
|
"grad_norm": 0.7261807462216028,
|
|
"learning_rate": 3.0879798320873546e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996377646923065,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2151.3,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 2.706896551724138,
|
|
"grad_norm": 0.6937031125933657,
|
|
"learning_rate": 3.0807555581070304e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25993096828460693,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2490.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 2.7155172413793105,
|
|
"grad_norm": 0.7786261011913469,
|
|
"learning_rate": 3.0735113117625045e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3422274589538574,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2306.8,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 2.7241379310344827,
|
|
"grad_norm": 0.7305876707441854,
|
|
"learning_rate": 3.0662472269274617e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31524696946144104,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2317.8,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 2.7327586206896552,
|
|
"grad_norm": 0.5716678765427254,
|
|
"learning_rate": 3.058963437842198e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28373339772224426,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3168.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.7413793103448274,
|
|
"grad_norm": 0.7402945538942874,
|
|
"learning_rate": 3.0516600791111465e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28591129183769226,
|
|
"step": 1590,
|
|
"valid_targets_mean": 1927.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 0.7215101466312314,
|
|
"learning_rate": 3.0443372857003857e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37307700514793396,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2697.4,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.7586206896551726,
|
|
"grad_norm": 0.7518556001114719,
|
|
"learning_rate": 3.036995192935149e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46996602416038513,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3145.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.7672413793103448,
|
|
"grad_norm": 0.6659069040257636,
|
|
"learning_rate": 3.029633936497321e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3989485502243042,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3162.9,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 2.7758620689655173,
|
|
"grad_norm": 0.6831300245308447,
|
|
"learning_rate": 3.0222536524229293e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3316096067428589,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2702.7,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.7844827586206895,
|
|
"grad_norm": 0.800951974974391,
|
|
"learning_rate": 3.0148544770996343e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438343405723572,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2641.6,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.793103448275862,
|
|
"grad_norm": 0.7425261001470722,
|
|
"learning_rate": 3.007436547264207e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.453504741191864,
|
|
"step": 1620,
|
|
"valid_targets_mean": 2584.2,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 2.8017241379310347,
|
|
"grad_norm": 0.5879356651252614,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33316174149513245,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4065.9,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.810344827586207,
|
|
"grad_norm": 0.706718695443324,
|
|
"learning_rate": 2.9925449727344184e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2795742154121399,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2615.4,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 2.8189655172413794,
|
|
"grad_norm": 0.7529444243081488,
|
|
"learning_rate": 2.985071603236374e-05,
|
|
"loss": 0.3375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4043508768081665,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2506.1,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 2.8275862068965516,
|
|
"grad_norm": 0.8478343119280666,
|
|
"learning_rate": 2.9775800296137474e-05,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4446679353713989,
|
|
"step": 1640,
|
|
"valid_targets_mean": 1810.9,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 2.836206896551724,
|
|
"grad_norm": 0.7560518629172477,
|
|
"learning_rate": 2.970070390310828e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3898079991340637,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2541.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 2.844827586206897,
|
|
"grad_norm": 0.8856301975925097,
|
|
"learning_rate": 2.962542824105762e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33871254324913025,
|
|
"step": 1650,
|
|
"valid_targets_mean": 1665.4,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 2.853448275862069,
|
|
"grad_norm": 0.6294418184869306,
|
|
"learning_rate": 2.954997470107982e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3157624304294586,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3095.0,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 2.862068965517241,
|
|
"grad_norm": 0.8204032206897608,
|
|
"learning_rate": 2.947434467755641e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3293578624725342,
|
|
"step": 1660,
|
|
"valid_targets_mean": 1745.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 2.8706896551724137,
|
|
"grad_norm": 0.6456585958148404,
|
|
"learning_rate": 2.9398539568130327e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4245327115058899,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3494.4,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 2.8793103448275863,
|
|
"grad_norm": 0.8505733892277261,
|
|
"learning_rate": 2.9322560773680087e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35134515166282654,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2579.3,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.887931034482759,
|
|
"grad_norm": 0.5659331350418874,
|
|
"learning_rate": 2.924640969829393e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2454695701599121,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3142.8,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 2.896551724137931,
|
|
"grad_norm": 0.8263610710384296,
|
|
"learning_rate": 2.9170087749243832e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35583004355430603,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2292.0,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.905172413793103,
|
|
"grad_norm": 0.7317330072322265,
|
|
"learning_rate": 2.9093596336959513e-05,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42089030146598816,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3087.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 2.913793103448276,
|
|
"grad_norm": 0.7090008562368073,
|
|
"learning_rate": 2.9016936875002377e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398629367351532,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2475.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 2.9224137931034484,
|
|
"grad_norm": 0.5708369528592575,
|
|
"learning_rate": 2.8940110780039385e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2487173229455948,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2851.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 2.9310344827586206,
|
|
"grad_norm": 0.8629172656317031,
|
|
"learning_rate": 2.8863119471816878e-05,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45342352986335754,
|
|
"step": 1700,
|
|
"valid_targets_mean": 2397.3,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.939655172413793,
|
|
"grad_norm": 0.7017326006705162,
|
|
"learning_rate": 2.878596437313434e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3168134093284607,
|
|
"step": 1705,
|
|
"valid_targets_mean": 2748.8,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 2.9482758620689653,
|
|
"grad_norm": 0.9153122090855202,
|
|
"learning_rate": 2.87086469098181e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35482969880104065,
|
|
"step": 1710,
|
|
"valid_targets_mean": 1666.0,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 2.956896551724138,
|
|
"grad_norm": 0.6987207748719971,
|
|
"learning_rate": 2.863116851069499e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36330512166023254,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2604.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 2.9655172413793105,
|
|
"grad_norm": 0.8404380696602337,
|
|
"learning_rate": 2.855353060756593e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3547259569168091,
|
|
"step": 1720,
|
|
"valid_targets_mean": 1856.7,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.9741379310344827,
|
|
"grad_norm": 0.8361645261499164,
|
|
"learning_rate": 2.8475734635179472e-05,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3678659498691559,
|
|
"step": 1725,
|
|
"valid_targets_mean": 1938.4,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.9827586206896552,
|
|
"grad_norm": 0.6342671412128362,
|
|
"learning_rate": 2.8397782031205295e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068227767944336,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2862.2,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 2.9913793103448274,
|
|
"grad_norm": 0.70350983920623,
|
|
"learning_rate": 2.8319674236207634e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.357696533203125,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2667.8,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.743166212291011,
|
|
"learning_rate": 2.8241412693618638e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29470035433769226,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2023.8,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 3.0086206896551726,
|
|
"grad_norm": 0.6224565306001386,
|
|
"learning_rate": 2.816299884971173e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32670581340789795,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3444.1,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 3.0172413793103448,
|
|
"grad_norm": 0.7963369151415589,
|
|
"learning_rate": 2.8084434153574847e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679070830345154,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2331.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 3.0258620689655173,
|
|
"grad_norm": 0.8324646205431433,
|
|
"learning_rate": 2.8005720057083685e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4266745150089264,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2636.2,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.0344827586206895,
|
|
"grad_norm": 0.6413595895033111,
|
|
"learning_rate": 2.792685801487486e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29419150948524475,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3145.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 3.043103448275862,
|
|
"grad_norm": 0.7783389309831885,
|
|
"learning_rate": 2.7847849484319008e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32182106375694275,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2271.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.0517241379310347,
|
|
"grad_norm": 0.9049644291439677,
|
|
"learning_rate": 2.7768695925493897e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867991328239441,
|
|
"step": 1770,
|
|
"valid_targets_mean": 1607.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 3.060344827586207,
|
|
"grad_norm": 0.6221458496489107,
|
|
"learning_rate": 2.7689398801157393e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41518068313598633,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4899.2,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.0689655172413794,
|
|
"grad_norm": 0.7613205444804567,
|
|
"learning_rate": 2.7609959576720467e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28315573930740356,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2667.1,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 3.0775862068965516,
|
|
"grad_norm": 0.8143413289449313,
|
|
"learning_rate": 2.7530379720220096e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27521416544914246,
|
|
"step": 1785,
|
|
"valid_targets_mean": 1862.4,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 3.086206896551724,
|
|
"grad_norm": 0.8480450228379316,
|
|
"learning_rate": 2.7450660702292132e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25870606303215027,
|
|
"step": 1790,
|
|
"valid_targets_mean": 1700.4,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 3.0948275862068964,
|
|
"grad_norm": 1.238431131240692,
|
|
"learning_rate": 2.7370803996144143e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775198817253113,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2701.1,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 3.103448275862069,
|
|
"grad_norm": 0.8041283864657258,
|
|
"learning_rate": 2.7290811077528166e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3663734793663025,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2370.8,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.1120689655172415,
|
|
"grad_norm": 0.8560717116522374,
|
|
"learning_rate": 2.7210683424713447e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30424728989601135,
|
|
"step": 1805,
|
|
"valid_targets_mean": 1917.6,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.1206896551724137,
|
|
"grad_norm": 0.6705119517885663,
|
|
"learning_rate": 2.7130422518459113e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609316408634186,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3044.2,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 3.1293103448275863,
|
|
"grad_norm": 0.7230075926694457,
|
|
"learning_rate": 2.705002984198684e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.301299512386322,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2647.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 3.1379310344827585,
|
|
"grad_norm": 0.8100080109130748,
|
|
"learning_rate": 2.6969506880953384e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31657522916793823,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2186.7,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 3.146551724137931,
|
|
"grad_norm": 0.8423669221223422,
|
|
"learning_rate": 2.688885512342318e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845194637775421,
|
|
"step": 1825,
|
|
"valid_targets_mean": 2300.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.1551724137931036,
|
|
"grad_norm": 0.6312094922458577,
|
|
"learning_rate": 2.680807605984082e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565772235393524,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3699.1,
|
|
"valid_targets_min": 866
|
|
},
|
|
{
|
|
"epoch": 3.163793103448276,
|
|
"grad_norm": 0.7811266015626148,
|
|
"learning_rate": 2.6727171183003502e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3389627933502197,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2511.2,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.1724137931034484,
|
|
"grad_norm": 0.8432121831699027,
|
|
"learning_rate": 2.6646141988033475e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29517269134521484,
|
|
"step": 1840,
|
|
"valid_targets_mean": 1874.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.1810344827586206,
|
|
"grad_norm": 1.0990805510619235,
|
|
"learning_rate": 2.6564989972350364e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816365957260132,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3813.9,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.189655172413793,
|
|
"grad_norm": 0.8279965744680023,
|
|
"learning_rate": 2.6483716635643535e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970189154148102,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2104.2,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 3.1982758620689653,
|
|
"grad_norm": 0.8196847761009315,
|
|
"learning_rate": 2.6402323479844364e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3160390257835388,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2641.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.206896551724138,
|
|
"grad_norm": 1.6772060307830066,
|
|
"learning_rate": 2.6320812009098472e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730563282966614,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2182.1,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.2155172413793105,
|
|
"grad_norm": 0.6982604978424688,
|
|
"learning_rate": 2.6239183729737957e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999745309352875,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2517.1,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.2241379310344827,
|
|
"grad_norm": 0.9495735105999557,
|
|
"learning_rate": 2.6157440150253535e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32909655570983887,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2221.7,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 3.2327586206896552,
|
|
"grad_norm": 0.7046523962164161,
|
|
"learning_rate": 2.6075582781266665e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26159366965293884,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2636.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.2413793103448274,
|
|
"grad_norm": 0.742401594365087,
|
|
"learning_rate": 2.5993613135501643e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503337562084198,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2461.8,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"grad_norm": 0.81344784045908,
|
|
"learning_rate": 2.5911532727757625e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37119442224502563,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2262.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.2586206896551726,
|
|
"grad_norm": 0.8599496940029792,
|
|
"learning_rate": 2.582934307488067e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28416091203689575,
|
|
"step": 1890,
|
|
"valid_targets_mean": 1887.2,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 3.2672413793103448,
|
|
"grad_norm": 0.9973091039588626,
|
|
"learning_rate": 2.5747045695735674e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35767045617103577,
|
|
"step": 1895,
|
|
"valid_targets_mean": 1535.4,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 3.2758620689655173,
|
|
"grad_norm": 0.6183796807165394,
|
|
"learning_rate": 2.5664642111178312e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26878735423088074,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3769.8,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 3.2844827586206895,
|
|
"grad_norm": 0.5757846667776677,
|
|
"learning_rate": 2.5582133844026943e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681350111961365,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4193.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 3.293103448275862,
|
|
"grad_norm": 0.7275958613903811,
|
|
"learning_rate": 2.5499522419034462e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21775801479816437,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2068.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.3017241379310347,
|
|
"grad_norm": 0.648898359722817,
|
|
"learning_rate": 2.5416809362860107e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34529179334640503,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3094.9,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.310344827586207,
|
|
"grad_norm": 0.62511185430479,
|
|
"learning_rate": 2.5333996204041276e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27681249380111694,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3492.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 3.3189655172413794,
|
|
"grad_norm": 0.6618453164474787,
|
|
"learning_rate": 2.5251084472965257e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23826207220554352,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2778.3,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 3.3275862068965516,
|
|
"grad_norm": 0.8038892215545382,
|
|
"learning_rate": 2.5168075701840948e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291714072227478,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2213.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 3.336206896551724,
|
|
"grad_norm": 0.5772711681646889,
|
|
"learning_rate": 2.5084971424670568e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17962776124477386,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2994.7,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 3.344827586206897,
|
|
"grad_norm": 0.7120332340970782,
|
|
"learning_rate": 2.500177317722126e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869756817817688,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2739.6,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 3.353448275862069,
|
|
"grad_norm": 0.7800922350042635,
|
|
"learning_rate": 2.4918482496996757e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766876816749573,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2119.1,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.3620689655172415,
|
|
"grad_norm": 0.8737595126801067,
|
|
"learning_rate": 2.483510092320895e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3302645981311798,
|
|
"step": 1950,
|
|
"valid_targets_mean": 1949.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 3.3706896551724137,
|
|
"grad_norm": 0.648990693564747,
|
|
"learning_rate": 2.4751629996749427e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28055495023727417,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3328.7,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 3.3793103448275863,
|
|
"grad_norm": 0.891540323940474,
|
|
"learning_rate": 2.4668071260161022e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185821771621704,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2461.1,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.3879310344827585,
|
|
"grad_norm": 0.6917295377931933,
|
|
"learning_rate": 2.4584426257609315e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.218665212392807,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2213.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.396551724137931,
|
|
"grad_norm": 1.010592244634894,
|
|
"learning_rate": 2.4500696534854062e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37055134773254395,
|
|
"step": 1970,
|
|
"valid_targets_mean": 1879.9,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.405172413793103,
|
|
"grad_norm": 0.9519123896373668,
|
|
"learning_rate": 2.4416883639220647e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33236557245254517,
|
|
"step": 1975,
|
|
"valid_targets_mean": 2066.6,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 3.413793103448276,
|
|
"grad_norm": 0.7323235119643152,
|
|
"learning_rate": 2.4332989119571506e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706213891506195,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2358.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 3.4224137931034484,
|
|
"grad_norm": 0.8154170625298807,
|
|
"learning_rate": 2.4249014526277473e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261958658695221,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2225.0,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 3.4310344827586206,
|
|
"grad_norm": 0.7583929734614248,
|
|
"learning_rate": 2.416496141118915e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3404994606971741,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2448.5,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 3.439655172413793,
|
|
"grad_norm": 0.76486119481783,
|
|
"learning_rate": 2.4080831327608224e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3539115786552429,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2719.1,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 3.4482758620689653,
|
|
"grad_norm": 0.5769571311421944,
|
|
"learning_rate": 2.3996625830258742e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36898016929626465,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4286.1,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 3.456896551724138,
|
|
"grad_norm": 0.5832684017230357,
|
|
"learning_rate": 2.3912346475258424e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20343273878097534,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3749.2,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 3.4655172413793105,
|
|
"grad_norm": 0.7460121503250889,
|
|
"learning_rate": 2.3827994820089856e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3511773943901062,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2519.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.4741379310344827,
|
|
"grad_norm": 0.8667582413887146,
|
|
"learning_rate": 2.3743572423571752e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761188745498657,
|
|
"step": 2015,
|
|
"valid_targets_mean": 1743.6,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 3.4827586206896552,
|
|
"grad_norm": 0.8443538913181367,
|
|
"learning_rate": 2.365908084583011e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3406338095664978,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2261.7,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.4913793103448274,
|
|
"grad_norm": 0.8474513541413226,
|
|
"learning_rate": 2.3574521648269406e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867903709411621,
|
|
"step": 2025,
|
|
"valid_targets_mean": 1912.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.7006881241253591,
|
|
"learning_rate": 2.3489896393543717e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570182681083679,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2751.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.5086206896551726,
|
|
"grad_norm": 0.7552809275843348,
|
|
"learning_rate": 2.340520664552788e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30410170555114746,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2564.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.5172413793103448,
|
|
"grad_norm": 0.8825867836498272,
|
|
"learning_rate": 2.3320453969288553e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3782389760017395,
|
|
"step": 2040,
|
|
"valid_targets_mean": 1920.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.5258620689655173,
|
|
"grad_norm": 0.7921272597229708,
|
|
"learning_rate": 2.32356399310553e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745018005371094,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2553.4,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.5344827586206895,
|
|
"grad_norm": 0.7063871315382729,
|
|
"learning_rate": 2.3150766098191667e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25150594115257263,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2500.0,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 3.543103448275862,
|
|
"grad_norm": 0.780288892052238,
|
|
"learning_rate": 2.3065834039166212e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334049582481384,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2465.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 3.5517241379310347,
|
|
"grad_norm": 0.7034316364336279,
|
|
"learning_rate": 2.2980845323523487e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2297370731830597,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2638.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 3.560344827586207,
|
|
"grad_norm": 0.6064369322023231,
|
|
"learning_rate": 2.2895801521855096e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786329984664917,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4094.6,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.5689655172413794,
|
|
"grad_norm": 0.653788622283706,
|
|
"learning_rate": 2.2810704205770587e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808879315853119,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3194.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.5775862068965516,
|
|
"grad_norm": 0.8521494971580398,
|
|
"learning_rate": 2.2725554947868495e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24698427319526672,
|
|
"step": 2075,
|
|
"valid_targets_mean": 1753.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.586206896551724,
|
|
"grad_norm": 0.8795497261182633,
|
|
"learning_rate": 2.2640355321707218e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842119634151459,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2270.3,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.594827586206897,
|
|
"grad_norm": 0.7839865726117404,
|
|
"learning_rate": 2.2555106901775955e-05,
|
|
"loss": 0.3753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968730032444,
|
|
"step": 2085,
|
|
"valid_targets_mean": 1988.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.603448275862069,
|
|
"grad_norm": 0.787606769892728,
|
|
"learning_rate": 2.246981126346564e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44315293431282043,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3020.5,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 3.612068965517241,
|
|
"grad_norm": 0.610584461944452,
|
|
"learning_rate": 2.238446998303977e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059382438659668,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3842.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 3.6206896551724137,
|
|
"grad_norm": 0.7828264148089181,
|
|
"learning_rate": 2.2299084637605343e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29074084758758545,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2296.9,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.6293103448275863,
|
|
"grad_norm": 0.8127024938088724,
|
|
"learning_rate": 2.221365680508364e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2938804030418396,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2357.0,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.637931034482759,
|
|
"grad_norm": 0.7160590569798018,
|
|
"learning_rate": 2.2128188064181143e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24818356335163116,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2373.7,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.646551724137931,
|
|
"grad_norm": 0.8368002017730753,
|
|
"learning_rate": 2.2042679994360296e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32543590664863586,
|
|
"step": 2115,
|
|
"valid_targets_mean": 1967.9,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 3.655172413793103,
|
|
"grad_norm": 0.6869621086443359,
|
|
"learning_rate": 2.195713417581033e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35331353545188904,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3145.4,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 3.663793103448276,
|
|
"grad_norm": 1.7802277556554986,
|
|
"learning_rate": 2.1871552189418113e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28531312942504883,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2089.6,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 3.6724137931034484,
|
|
"grad_norm": 0.7617925208270429,
|
|
"learning_rate": 2.1785935616738855e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25667494535446167,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2242.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 3.6810344827586206,
|
|
"grad_norm": 0.8992850081501342,
|
|
"learning_rate": 2.170028603996695e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28584620356559753,
|
|
"step": 2135,
|
|
"valid_targets_mean": 1704.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 3.689655172413793,
|
|
"grad_norm": 0.7823582786020942,
|
|
"learning_rate": 2.161460504190668e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34780627489089966,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2750.6,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 3.6982758620689653,
|
|
"grad_norm": 0.8195994514460628,
|
|
"learning_rate": 2.1528894205943017e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3839362859725952,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2548.4,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 3.706896551724138,
|
|
"grad_norm": 0.8494572477645436,
|
|
"learning_rate": 2.1443155116012328e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4128968417644501,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2398.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 3.7155172413793105,
|
|
"grad_norm": 0.8997195991583952,
|
|
"learning_rate": 2.1357389356573098e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27611222863197327,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1524.3,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 3.7241379310344827,
|
|
"grad_norm": 0.7164419636717259,
|
|
"learning_rate": 2.1271598512576705e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35011881589889526,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2900.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 3.7327586206896552,
|
|
"grad_norm": 0.8480166956097996,
|
|
"learning_rate": 2.1185784169438047e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34647125005722046,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2132.8,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 3.7413793103448274,
|
|
"grad_norm": 0.6292335934505978,
|
|
"learning_rate": 2.1099947913006303e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30451124906539917,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3287.4,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.5754753939117979,
|
|
"learning_rate": 2.1014091329535618e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302144855260849,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4022.0,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 3.7586206896551726,
|
|
"grad_norm": 0.7017710687987908,
|
|
"learning_rate": 2.0928216005655762e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2097795307636261,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2899.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 3.7672413793103448,
|
|
"grad_norm": 0.9187704585481599,
|
|
"learning_rate": 2.084232352834285e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31041762232780457,
|
|
"step": 2185,
|
|
"valid_targets_mean": 2338.7,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.7758620689655173,
|
|
"grad_norm": 0.7170011346350758,
|
|
"learning_rate": 2.0756415484889975e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23631168901920319,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2144.9,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.7844827586206895,
|
|
"grad_norm": 0.8606344295508895,
|
|
"learning_rate": 2.0670493462877897e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32752877473831177,
|
|
"step": 2195,
|
|
"valid_targets_mean": 1855.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 3.793103448275862,
|
|
"grad_norm": 0.7682771055577163,
|
|
"learning_rate": 2.0584559050145706e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48432260751724243,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3133.6,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 3.8017241379310347,
|
|
"grad_norm": 0.8349304825117818,
|
|
"learning_rate": 2.0498613834761462e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010793924331665,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2094.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.810344827586207,
|
|
"grad_norm": 0.670157550483695,
|
|
"learning_rate": 2.0412659404992862e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826797068119049,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3276.9,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 3.8189655172413794,
|
|
"grad_norm": 0.733561876185473,
|
|
"learning_rate": 2.0326697349277893e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30336707830429077,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2686.1,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 3.8275862068965516,
|
|
"grad_norm": 0.7049773389785631,
|
|
"learning_rate": 2.024072925619546e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26717764139175415,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3601.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 3.836206896551724,
|
|
"grad_norm": 0.8019247619108416,
|
|
"learning_rate": 2.0154756714436043e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3339763283729553,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2529.2,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 3.844827586206897,
|
|
"grad_norm": 0.8139666943159004,
|
|
"learning_rate": 2.006878131277233e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815071940422058,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2022.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 3.853448275862069,
|
|
"grad_norm": 0.713112858060529,
|
|
"learning_rate": 1.9982804640029864e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495800197124481,
|
|
"step": 2235,
|
|
"valid_targets_mean": 2557.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 3.862068965517241,
|
|
"grad_norm": 0.9058457107189625,
|
|
"learning_rate": 1.989682828505767e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3590564727783203,
|
|
"step": 2240,
|
|
"valid_targets_mean": 1953.3,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 3.8706896551724137,
|
|
"grad_norm": 0.7293645847517138,
|
|
"learning_rate": 1.9810853836698913e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24753639101982117,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2149.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 3.8793103448275863,
|
|
"grad_norm": 0.7031641747976873,
|
|
"learning_rate": 1.972488288376151e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974679470062256,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2780.9,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 3.887931034482759,
|
|
"grad_norm": 0.8885166742958897,
|
|
"learning_rate": 1.963891701498879e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31055015325546265,
|
|
"step": 2255,
|
|
"valid_targets_mean": 1596.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.896551724137931,
|
|
"grad_norm": 0.7261435079831179,
|
|
"learning_rate": 1.955295781903014e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24428752064704895,
|
|
"step": 2260,
|
|
"valid_targets_mean": 2453.4,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.905172413793103,
|
|
"grad_norm": 0.8246463215353056,
|
|
"learning_rate": 1.9467006884411605e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109739422798157,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2420.3,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.913793103448276,
|
|
"grad_norm": 0.6601682761330331,
|
|
"learning_rate": 1.9381065799506583e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506582736968994,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3001.2,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 3.9224137931034484,
|
|
"grad_norm": 0.6334105777237168,
|
|
"learning_rate": 1.929513615250643e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3046247363090515,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3295.7,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 3.9310344827586206,
|
|
"grad_norm": 0.6634253062353758,
|
|
"learning_rate": 1.9209219531391155e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28064969182014465,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3317.7,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 3.939655172413793,
|
|
"grad_norm": 0.8241315570534355,
|
|
"learning_rate": 1.9123317523900015e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3636839687824249,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2278.6,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.9482758620689653,
|
|
"grad_norm": 0.5973298614024068,
|
|
"learning_rate": 1.9037431717502253e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3552555441856384,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4570.8,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.956896551724138,
|
|
"grad_norm": 0.7125951830374488,
|
|
"learning_rate": 1.8951563699367673e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534135580062866,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2782.2,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 3.9655172413793105,
|
|
"grad_norm": 0.7013488738121144,
|
|
"learning_rate": 1.886571505633737e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117072284221649,
|
|
"step": 2300,
|
|
"valid_targets_mean": 2736.2,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 3.9741379310344827,
|
|
"grad_norm": 0.7932404713933431,
|
|
"learning_rate": 1.8779887374894384e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.304098516702652,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2292.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 3.9827586206896552,
|
|
"grad_norm": 0.7615525880276409,
|
|
"learning_rate": 1.8694082241134385e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014771342277527,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2608.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.9913793103448274,
|
|
"grad_norm": 0.7054702340895173,
|
|
"learning_rate": 1.8608301240736378e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216426968574524,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2888.3,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.8671769739177089,
|
|
"learning_rate": 1.852254595893335e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3277759850025177,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2034.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 4.008620689655173,
|
|
"grad_norm": 0.8662049706352303,
|
|
"learning_rate": 1.8436817980483035e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23021268844604492,
|
|
"step": 2325,
|
|
"valid_targets_mean": 1730.6,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 4.017241379310345,
|
|
"grad_norm": 0.7888888970206421,
|
|
"learning_rate": 1.835111888963859e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3193662464618683,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2617.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.025862068965517,
|
|
"grad_norm": 0.8050736339071413,
|
|
"learning_rate": 1.8265450270119335e-05,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23052524030208588,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2918.8,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 4.0344827586206895,
|
|
"grad_norm": 0.8104059449972785,
|
|
"learning_rate": 1.8179813705081468e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25504401326179504,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2983.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 4.043103448275862,
|
|
"grad_norm": 0.7353605793149197,
|
|
"learning_rate": 1.8094210777088833e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252414733171463,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2531.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.051724137931035,
|
|
"grad_norm": 0.6092689710576782,
|
|
"learning_rate": 1.800864306808367e-05,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20068293809890747,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3792.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 4.060344827586207,
|
|
"grad_norm": 0.858412056979054,
|
|
"learning_rate": 1.7923112159357344e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28051990270614624,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2174.3,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.068965517241379,
|
|
"grad_norm": 0.8572346124316681,
|
|
"learning_rate": 1.783761963152117e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31026482582092285,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2267.0,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 4.077586206896552,
|
|
"grad_norm": 0.9678784332310315,
|
|
"learning_rate": 1.7752167064477173e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29770296812057495,
|
|
"step": 2365,
|
|
"valid_targets_mean": 1884.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.086206896551724,
|
|
"grad_norm": 0.7779777387424832,
|
|
"learning_rate": 1.7666756037388923e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32733461260795593,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2521.4,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 4.094827586206897,
|
|
"grad_norm": 0.9267768621801464,
|
|
"learning_rate": 1.7581388128652315e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627328038215637,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2000.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 4.103448275862069,
|
|
"grad_norm": 0.9298458568007258,
|
|
"learning_rate": 1.7496064915866414e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559812068939209,
|
|
"step": 2380,
|
|
"valid_targets_mean": 1880.1,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.112068965517241,
|
|
"grad_norm": 0.7420079443094495,
|
|
"learning_rate": 1.7410787975804314e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20432040095329285,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2497.0,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 4.120689655172414,
|
|
"grad_norm": 0.9139413054677984,
|
|
"learning_rate": 1.732555888438398e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2299332469701767,
|
|
"step": 2390,
|
|
"valid_targets_mean": 1902.1,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 4.129310344827586,
|
|
"grad_norm": 0.9093389443991601,
|
|
"learning_rate": 1.7240379216639136e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2951817512512207,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2019.4,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 4.137931034482759,
|
|
"grad_norm": 0.7632400394885388,
|
|
"learning_rate": 1.7155250546690173e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2085626870393753,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2969.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.146551724137931,
|
|
"grad_norm": 0.8260537770861034,
|
|
"learning_rate": 1.707017444771502e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482033669948578,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2091.2,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 4.155172413793103,
|
|
"grad_norm": 0.6923161253459018,
|
|
"learning_rate": 1.6985152491920103e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20103991031646729,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2961.4,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.163793103448276,
|
|
"grad_norm": 0.723678400646684,
|
|
"learning_rate": 1.690018625051128e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579345107078552,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2701.3,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.172413793103448,
|
|
"grad_norm": 0.7494143777775046,
|
|
"learning_rate": 1.681527729366481e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2361277937889099,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2569.4,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.181034482758621,
|
|
"grad_norm": 0.9112887953263451,
|
|
"learning_rate": 1.673042719049834e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863658666610718,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2074.4,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.189655172413793,
|
|
"grad_norm": 0.7937887670056066,
|
|
"learning_rate": 1.664563750904188e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2154037058353424,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2729.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 4.198275862068965,
|
|
"grad_norm": 0.7433928051105411,
|
|
"learning_rate": 1.656090981620888e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24456550180912018,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2620.4,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 4.206896551724138,
|
|
"grad_norm": 1.045004846134096,
|
|
"learning_rate": 1.64762456777672e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445065975189209,
|
|
"step": 2440,
|
|
"valid_targets_mean": 1532.7,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 4.2155172413793105,
|
|
"grad_norm": 0.8682943506947929,
|
|
"learning_rate": 1.6391646658310242e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688167989253998,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2110.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.224137931034483,
|
|
"grad_norm": 0.7496628558965965,
|
|
"learning_rate": 1.6307114321227996e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280312180519104,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2859.6,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 4.232758620689655,
|
|
"grad_norm": 0.8178573339580305,
|
|
"learning_rate": 1.622265022867818e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3833100199699402,
|
|
"step": 2455,
|
|
"valid_targets_mean": 2496.7,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 4.241379310344827,
|
|
"grad_norm": 0.8958331031395433,
|
|
"learning_rate": 1.6138255941557336e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23110151290893555,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2041.5,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"grad_norm": 1.2008802918634554,
|
|
"learning_rate": 1.6053933019472003e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23847255110740662,
|
|
"step": 2465,
|
|
"valid_targets_mean": 2182.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.258620689655173,
|
|
"grad_norm": 0.7253739780355688,
|
|
"learning_rate": 1.5969683020709902e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19945028424263,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3067.0,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 4.267241379310345,
|
|
"grad_norm": 0.7944155046031013,
|
|
"learning_rate": 1.5885507502211108e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24398934841156006,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2691.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.275862068965517,
|
|
"grad_norm": 0.7602735777735121,
|
|
"learning_rate": 1.5801408019539345e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26826539635658264,
|
|
"step": 2480,
|
|
"valid_targets_mean": 2809.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 4.2844827586206895,
|
|
"grad_norm": 0.8482564423959594,
|
|
"learning_rate": 1.5717386126853156e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374395728111267,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2188.2,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.293103448275862,
|
|
"grad_norm": 0.885080946312999,
|
|
"learning_rate": 1.5633443376877236e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21632343530654907,
|
|
"step": 2490,
|
|
"valid_targets_mean": 1872.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 4.301724137931035,
|
|
"grad_norm": 0.7270077139583213,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23827451467514038,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2864.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 4.310344827586207,
|
|
"grad_norm": 0.884280348292525,
|
|
"learning_rate": 1.546580150861351e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653864026069641,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2208.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 4.318965517241379,
|
|
"grad_norm": 0.7511935061914762,
|
|
"learning_rate": 1.5382105488347654e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2234722077846527,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2560.5,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 4.327586206896552,
|
|
"grad_norm": 0.6927902145845654,
|
|
"learning_rate": 1.5298494806778733e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259279429912567,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3623.5,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 4.336206896551724,
|
|
"grad_norm": 0.9363964007863711,
|
|
"learning_rate": 1.5214971009032251e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27032387256622314,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2325.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 4.344827586206897,
|
|
"grad_norm": 0.6946434774843553,
|
|
"learning_rate": 1.51315356386281e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19004008173942566,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2366.9,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.353448275862069,
|
|
"grad_norm": 0.8539126756883658,
|
|
"learning_rate": 1.5048190237452052e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525019645690918,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2396.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 4.362068965517241,
|
|
"grad_norm": 0.79778758981723,
|
|
"learning_rate": 1.4964936345727217e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2425791621208191,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2591.4,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 4.370689655172414,
|
|
"grad_norm": 0.9069366963385622,
|
|
"learning_rate": 1.4881775501985645e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515653371810913,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2669.3,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 4.379310344827586,
|
|
"grad_norm": 0.7963250270457406,
|
|
"learning_rate": 1.4798709243039842e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21995809674263,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2314.8,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 4.387931034482759,
|
|
"grad_norm": 0.9179745890697341,
|
|
"learning_rate": 1.4715739103954375e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25221094489097595,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2191.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.396551724137931,
|
|
"grad_norm": 0.6622828676925838,
|
|
"learning_rate": 1.4632866618017543e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25719982385635376,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3660.8,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 4.405172413793103,
|
|
"grad_norm": 0.8591681509143264,
|
|
"learning_rate": 1.4550093316712987e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24447187781333923,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2159.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.413793103448276,
|
|
"grad_norm": 0.7666460077302556,
|
|
"learning_rate": 1.4467420729691433e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23079776763916016,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2426.2,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.422413793103448,
|
|
"grad_norm": 0.8628984095026991,
|
|
"learning_rate": 1.4384850384742412e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20822520554065704,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2130.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.431034482758621,
|
|
"grad_norm": 0.6655183257870557,
|
|
"learning_rate": 1.4302383807766003e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23718425631523132,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3646.4,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 4.439655172413793,
|
|
"grad_norm": 0.9842333882472362,
|
|
"learning_rate": 1.4220022522744667e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799840569496155,
|
|
"step": 2575,
|
|
"valid_targets_mean": 1720.6,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.448275862068965,
|
|
"grad_norm": 0.826442375777945,
|
|
"learning_rate": 1.4137768051715059e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25755539536476135,
|
|
"step": 2580,
|
|
"valid_targets_mean": 2250.4,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.456896551724138,
|
|
"grad_norm": 0.8335441437915141,
|
|
"learning_rate": 1.4055621914739915e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718716859817505,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2529.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 4.4655172413793105,
|
|
"grad_norm": 0.5992395028797994,
|
|
"learning_rate": 1.3973585629879973e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20316217839717865,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4162.9,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.474137931034483,
|
|
"grad_norm": 0.5884128756243574,
|
|
"learning_rate": 1.3891660713165873e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20528560876846313,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3888.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 4.482758620689655,
|
|
"grad_norm": 0.927923085075217,
|
|
"learning_rate": 1.3809848678570204e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20741838216781616,
|
|
"step": 2600,
|
|
"valid_targets_mean": 1499.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.491379310344827,
|
|
"grad_norm": 0.6804876534067182,
|
|
"learning_rate": 1.3728151037979468e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584356665611267,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3414.1,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 0.8331576647298734,
|
|
"learning_rate": 1.3646569301166177e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19165021181106567,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2333.8,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 4.508620689655173,
|
|
"grad_norm": 0.8201431783332219,
|
|
"learning_rate": 1.3565104975760936e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27077627182006836,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2511.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 4.517241379310345,
|
|
"grad_norm": 0.9034946742282952,
|
|
"learning_rate": 1.34837595672246e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28327399492263794,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2170.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.525862068965517,
|
|
"grad_norm": 0.7195977109229138,
|
|
"learning_rate": 1.3402534578820428e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510998547077179,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2907.8,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.5344827586206895,
|
|
"grad_norm": 0.7377623184998696,
|
|
"learning_rate": 1.3321431511586308e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23612627387046814,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2875.3,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.543103448275862,
|
|
"grad_norm": 0.6492806402613284,
|
|
"learning_rate": 1.3240451864307048e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24571189284324646,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3630.4,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 4.551724137931035,
|
|
"grad_norm": 0.8073728194218844,
|
|
"learning_rate": 1.3159597133486628e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24010978639125824,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2459.0,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.560344827586206,
|
|
"grad_norm": 0.7825834526443618,
|
|
"learning_rate": 1.3078868813320594e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30113720893859863,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2919.1,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.568965517241379,
|
|
"grad_norm": 0.8053544868678272,
|
|
"learning_rate": 1.2998268395668412e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285986840724945,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2252.1,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.577586206896552,
|
|
"grad_norm": 0.7286795475876217,
|
|
"learning_rate": 1.2917797370025908e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20709648728370667,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2482.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.586206896551724,
|
|
"grad_norm": 0.7980126972228626,
|
|
"learning_rate": 1.2837457223497754e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862476408481598,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2788.4,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.594827586206897,
|
|
"grad_norm": 0.9547103713318049,
|
|
"learning_rate": 1.2757249440769957e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2612942159175873,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2043.6,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 4.603448275862069,
|
|
"grad_norm": 0.905981008508086,
|
|
"learning_rate": 1.2677175504082452e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664405107498169,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2115.1,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 4.612068965517241,
|
|
"grad_norm": 0.7140183681440331,
|
|
"learning_rate": 1.2597236893201712e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34579741954803467,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4065.8,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 4.620689655172414,
|
|
"grad_norm": 0.6892440746656789,
|
|
"learning_rate": 1.2517435085393373e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21152852475643158,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3078.9,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.629310344827586,
|
|
"grad_norm": 0.8486268587233803,
|
|
"learning_rate": 1.2437771555394944e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553647756576538,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2060.7,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.637931034482759,
|
|
"grad_norm": 0.6729119974556853,
|
|
"learning_rate": 1.2358247775388578e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291421502828598,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3406.8,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 4.646551724137931,
|
|
"grad_norm": 0.9670461613890826,
|
|
"learning_rate": 1.227886521497383e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571569085121155,
|
|
"step": 2695,
|
|
"valid_targets_mean": 1696.8,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.655172413793103,
|
|
"grad_norm": 0.9756750116337135,
|
|
"learning_rate": 1.2199625341140533e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31376075744628906,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2084.3,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.663793103448276,
|
|
"grad_norm": 0.8118045075648823,
|
|
"learning_rate": 1.2120529618241665e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494584619998932,
|
|
"step": 2705,
|
|
"valid_targets_mean": 2730.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.672413793103448,
|
|
"grad_norm": 0.7694865435535756,
|
|
"learning_rate": 1.2041579507966288e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23829087615013123,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2551.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 4.681034482758621,
|
|
"grad_norm": 0.9076282780504955,
|
|
"learning_rate": 1.1962776469312556e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3020887076854706,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2050.4,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 4.689655172413794,
|
|
"grad_norm": 0.984237292189467,
|
|
"learning_rate": 1.1884121958560721e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580467462539673,
|
|
"step": 2720,
|
|
"valid_targets_mean": 1632.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.698275862068965,
|
|
"grad_norm": 0.8914302319151403,
|
|
"learning_rate": 1.1805617429246254e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4029195308685303,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2773.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 4.706896551724138,
|
|
"grad_norm": 0.8839779314527022,
|
|
"learning_rate": 1.1727264332132978e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26910024881362915,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2261.6,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 4.7155172413793105,
|
|
"grad_norm": 0.8225743421333721,
|
|
"learning_rate": 1.1649064115186216e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270842969417572,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2445.1,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.724137931034483,
|
|
"grad_norm": 0.8877295961730737,
|
|
"learning_rate": 1.1571018223546095e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545921802520752,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2293.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 4.732758620689655,
|
|
"grad_norm": 0.9014458800755352,
|
|
"learning_rate": 1.1493128099500806e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19791623950004578,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2367.0,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 4.741379310344827,
|
|
"grad_norm": 0.8402569938502772,
|
|
"learning_rate": 1.1415395182459925e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3597656190395355,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2686.0,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"grad_norm": 0.7089169362206015,
|
|
"learning_rate": 1.1337820908927891e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29866451025009155,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3162.3,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 4.758620689655173,
|
|
"grad_norm": 0.9099687598408538,
|
|
"learning_rate": 1.126040671247738e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32522955536842346,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2540.8,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 4.767241379310345,
|
|
"grad_norm": 0.715299266743667,
|
|
"learning_rate": 1.1183154023722839e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26378440856933594,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3656.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.775862068965517,
|
|
"grad_norm": 0.8044464553170273,
|
|
"learning_rate": 1.1106064270294068e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24478477239608765,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2387.5,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.7844827586206895,
|
|
"grad_norm": 0.8572643434070522,
|
|
"learning_rate": 1.1029138876809818e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885858714580536,
|
|
"step": 2775,
|
|
"valid_targets_mean": 2269.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.793103448275862,
|
|
"grad_norm": 0.8831366905295249,
|
|
"learning_rate": 1.0952379264851464e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25413423776626587,
|
|
"step": 2780,
|
|
"valid_targets_mean": 1837.9,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 4.801724137931035,
|
|
"grad_norm": 0.7537924671729679,
|
|
"learning_rate": 1.087578685293674e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21763187646865845,
|
|
"step": 2785,
|
|
"valid_targets_mean": 2566.1,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 4.810344827586206,
|
|
"grad_norm": 0.7703574579799106,
|
|
"learning_rate": 1.0799363056493529e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20490054786205292,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2255.1,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.818965517241379,
|
|
"grad_norm": 0.8233927826656027,
|
|
"learning_rate": 1.0723109287833697e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616879940032959,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2616.1,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 4.827586206896552,
|
|
"grad_norm": 0.8032320228650361,
|
|
"learning_rate": 1.0647026956126979e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26382285356521606,
|
|
"step": 2800,
|
|
"valid_targets_mean": 2502.1,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 4.836206896551724,
|
|
"grad_norm": 0.6432642583887519,
|
|
"learning_rate": 1.0571117467374972e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29295748472213745,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3958.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.844827586206897,
|
|
"grad_norm": 0.8550150623571411,
|
|
"learning_rate": 1.0495382224385154e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27578306198120117,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2180.1,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 4.853448275862069,
|
|
"grad_norm": 0.8282277450172278,
|
|
"learning_rate": 1.0419822626744894e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23954559862613678,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2158.8,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 4.862068965517241,
|
|
"grad_norm": 0.739519390252053,
|
|
"learning_rate": 1.0344440070795671e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24350044131278992,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2944.4,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 4.870689655172414,
|
|
"grad_norm": 0.6351145044807031,
|
|
"learning_rate": 1.0269235949607223e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.388615220785141,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4205.4,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.879310344827586,
|
|
"grad_norm": 0.9737679102183793,
|
|
"learning_rate": 1.019421165295182e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30197206139564514,
|
|
"step": 2830,
|
|
"valid_targets_mean": 1733.2,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.887931034482759,
|
|
"grad_norm": 0.6615703692662012,
|
|
"learning_rate": 1.0119368567278545e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22386646270751953,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3164.2,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 4.896551724137931,
|
|
"grad_norm": 0.9421012456482122,
|
|
"learning_rate": 1.0044708075687746e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27233490347862244,
|
|
"step": 2840,
|
|
"valid_targets_mean": 1674.3,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 4.905172413793103,
|
|
"grad_norm": 0.6755294173673939,
|
|
"learning_rate": 9.97023155790541e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.362407386302948,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3552.1,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.913793103448276,
|
|
"grad_norm": 0.7331491510364737,
|
|
"learning_rate": 9.895940390257675e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711232602596283,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3487.1,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.922413793103448,
|
|
"grad_norm": 0.9290584095263694,
|
|
"learning_rate": 9.821835945645426e-06,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613976001739502,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1895.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 4.931034482758621,
|
|
"grad_norm": 0.7712276355626331,
|
|
"learning_rate": 9.747919593518897e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23783954977989197,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2612.0,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 4.939655172413794,
|
|
"grad_norm": 0.915887245911443,
|
|
"learning_rate": 9.674192699852397e-06,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635113000869751,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2107.1,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 4.948275862068965,
|
|
"grad_norm": 0.8107696850116063,
|
|
"learning_rate": 9.600656627119e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25736725330352783,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2507.6,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.956896551724138,
|
|
"grad_norm": 0.8281895858985249,
|
|
"learning_rate": 9.52731273426544e-06,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27084803581237793,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2390.9,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.9655172413793105,
|
|
"grad_norm": 0.8513496909637406,
|
|
"learning_rate": 9.454162376686959e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3300788998603821,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2519.0,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.974137931034483,
|
|
"grad_norm": 0.7655388287695867,
|
|
"learning_rate": 9.381206906202268e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.245473712682724,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2970.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 4.982758620689655,
|
|
"grad_norm": 0.8822118338492542,
|
|
"learning_rate": 9.308447671028546e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259021043777466,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2148.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.991379310344827,
|
|
"grad_norm": 0.806073479088948,
|
|
"learning_rate": 9.235886015756579e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970079481601715,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2542.0,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7534412948501094,
|
|
"learning_rate": 9.163523281325855e-06,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989104300737381,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2618.3,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 5.008620689655173,
|
|
"grad_norm": 1.105096861322533,
|
|
"learning_rate": 9.09136080499979e-06,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033461630344391,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2087.2,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.017241379310345,
|
|
"grad_norm": 0.814355316956615,
|
|
"learning_rate": 9.019399920341056e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2211400270462036,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2302.2,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.025862068965517,
|
|
"grad_norm": 0.7555369935442012,
|
|
"learning_rate": 8.947641957186901e-06,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22201001644134521,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2680.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.0344827586206895,
|
|
"grad_norm": 0.8780869760515323,
|
|
"learning_rate": 8.876088241624581e-06,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23922735452651978,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2703.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.043103448275862,
|
|
"grad_norm": 0.7990653658780432,
|
|
"learning_rate": 8.804740095966854e-06,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19045668840408325,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2416.1,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.051724137931035,
|
|
"grad_norm": 0.88230408527402,
|
|
"learning_rate": 8.733598838727559e-06,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37333962321281433,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2585.8,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 5.060344827586207,
|
|
"grad_norm": 0.6819112657231726,
|
|
"learning_rate": 8.662665784597229e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23839415609836578,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3358.6,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 5.068965517241379,
|
|
"grad_norm": 0.8293484691002044,
|
|
"learning_rate": 8.591942244418787e-06,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19481952488422394,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.077586206896552,
|
|
"grad_norm": 0.7881892372101212,
|
|
"learning_rate": 8.521429525163353e-06,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24439600110054016,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2869.0,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 5.086206896551724,
|
|
"grad_norm": 0.8617105163860078,
|
|
"learning_rate": 8.451128929906103e-06,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24448537826538086,
|
|
"step": 2950,
|
|
"valid_targets_mean": 2538.8,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 5.094827586206897,
|
|
"grad_norm": 0.9667277337097181,
|
|
"learning_rate": 8.381041757802104e-06,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20713761448860168,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1746.7,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 5.103448275862069,
|
|
"grad_norm": 0.9687477914516257,
|
|
"learning_rate": 8.311169304062408e-06,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2437906712293625,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2335.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 5.112068965517241,
|
|
"grad_norm": 0.8722989954744373,
|
|
"learning_rate": 8.24151285993005e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599342167377472,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2459.4,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 5.120689655172414,
|
|
"grad_norm": 0.8167610619718768,
|
|
"learning_rate": 8.172073712656217e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22704723477363586,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2647.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 5.129310344827586,
|
|
"grad_norm": 0.8851364961430449,
|
|
"learning_rate": 8.102853145476443e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630884647369385,
|
|
"step": 2975,
|
|
"valid_targets_mean": 1864.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.137931034482759,
|
|
"grad_norm": 0.8571259105485494,
|
|
"learning_rate": 8.033852437586909e-06,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30201417207717896,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2867.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.146551724137931,
|
|
"grad_norm": 0.7274305622693614,
|
|
"learning_rate": 7.965072864120795e-06,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21190223097801208,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3433.0,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 5.155172413793103,
|
|
"grad_norm": 0.9890160470396074,
|
|
"learning_rate": 7.896515696124703e-06,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21601173281669617,
|
|
"step": 2990,
|
|
"valid_targets_mean": 1532.0,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 5.163793103448276,
|
|
"grad_norm": 0.997039918531523,
|
|
"learning_rate": 7.828182200535192e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23005810379981995,
|
|
"step": 2995,
|
|
"valid_targets_mean": 1639.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 5.172413793103448,
|
|
"grad_norm": 0.6867188638921778,
|
|
"learning_rate": 7.760073640155363e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2459755539894104,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3401.4,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 5.181034482758621,
|
|
"grad_norm": 0.6533351330245627,
|
|
"learning_rate": 7.6921912736315e-06,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.376540869474411,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4465.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 5.189655172413793,
|
|
"grad_norm": 0.8644740868223272,
|
|
"learning_rate": 7.624536355429832e-06,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22686511278152466,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2452.3,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 5.198275862068965,
|
|
"grad_norm": 0.7124035393064789,
|
|
"learning_rate": 7.557110135813341e-06,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3452039062976837,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3736.2,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 5.206896551724138,
|
|
"grad_norm": 0.7775149834470849,
|
|
"learning_rate": 7.489913860818662e-06,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.185041606426239,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2617.4,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 5.2155172413793105,
|
|
"grad_norm": 0.7965342418230602,
|
|
"learning_rate": 7.4229487722330315e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30193090438842773,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2663.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 5.224137931034483,
|
|
"grad_norm": 0.7442252187409096,
|
|
"learning_rate": 7.356216107571399e-06,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.226355642080307,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2999.0,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 5.232758620689655,
|
|
"grad_norm": 0.8920967655645357,
|
|
"learning_rate": 7.289717100053497e-06,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274308443069458,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2436.9,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 5.241379310344827,
|
|
"grad_norm": 0.80538672065562,
|
|
"learning_rate": 7.2234529785810645e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25805386900901794,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2757.7,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 5.25,
|
|
"grad_norm": 0.9390707357590979,
|
|
"learning_rate": 7.157424967715163e-06,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23520056903362274,
|
|
"step": 3045,
|
|
"valid_targets_mean": 1874.8,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.258620689655173,
|
|
"grad_norm": 0.928924497701895,
|
|
"learning_rate": 7.091634287653526e-06,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29944998025894165,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2415.0,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 5.267241379310345,
|
|
"grad_norm": 0.9511943447002247,
|
|
"learning_rate": 7.026082154208012e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21823076903820038,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2298.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.275862068965517,
|
|
"grad_norm": 0.8603536556942801,
|
|
"learning_rate": 6.960769778782133e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770598232746124,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2980.1,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 5.2844827586206895,
|
|
"grad_norm": 0.8370012369788737,
|
|
"learning_rate": 6.89569836834868e-06,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24535402655601501,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2859.5,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.293103448275862,
|
|
"grad_norm": 0.7706085321244089,
|
|
"learning_rate": 6.830869125427406e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22897306084632874,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2896.3,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.301724137931035,
|
|
"grad_norm": 0.9389137954620951,
|
|
"learning_rate": 6.766283248062817e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2493031620979309,
|
|
"step": 3075,
|
|
"valid_targets_mean": 1989.4,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 5.310344827586207,
|
|
"grad_norm": 0.99461767506957,
|
|
"learning_rate": 6.701941929801996e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475142478942871,
|
|
"step": 3080,
|
|
"valid_targets_mean": 1796.1,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 5.318965517241379,
|
|
"grad_norm": 0.8124459132077362,
|
|
"learning_rate": 6.637846359672611e-06,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720719277858734,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3237.2,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.327586206896552,
|
|
"grad_norm": 0.8142226470682105,
|
|
"learning_rate": 6.57399772216089e-06,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31625697016716003,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2509.7,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.336206896551724,
|
|
"grad_norm": 0.9065141934251587,
|
|
"learning_rate": 6.510397197189724e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21385717391967773,
|
|
"step": 3095,
|
|
"valid_targets_mean": 1814.4,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.344827586206897,
|
|
"grad_norm": 0.7442529504808642,
|
|
"learning_rate": 6.447045960096909e-06,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17409583926200867,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2769.3,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 5.353448275862069,
|
|
"grad_norm": 0.7252146553291541,
|
|
"learning_rate": 6.383945181613398e-06,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682117819786072,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3152.5,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 5.362068965517241,
|
|
"grad_norm": 0.8114618415473984,
|
|
"learning_rate": 6.32109602784166e-06,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23913054168224335,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2647.9,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 5.370689655172414,
|
|
"grad_norm": 1.051915605302084,
|
|
"learning_rate": 6.258499660234147e-06,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22146379947662354,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1598.4,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 5.379310344827586,
|
|
"grad_norm": 0.8789321218372114,
|
|
"learning_rate": 6.196157235571813e-06,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028132915496826,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2299.0,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.387931034482759,
|
|
"grad_norm": 0.9372581236618194,
|
|
"learning_rate": 6.134069905942764e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2093108743429184,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2376.3,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 5.396551724137931,
|
|
"grad_norm": 0.9140188518901189,
|
|
"learning_rate": 6.072238818720919e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532634735107422,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2265.4,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 5.405172413793103,
|
|
"grad_norm": 0.7513197162873506,
|
|
"learning_rate": 6.010665116544858e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515407204627991,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3497.4,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 5.413793103448276,
|
|
"grad_norm": 0.801555947638085,
|
|
"learning_rate": 5.9493499372967e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2099888026714325,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2794.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.422413793103448,
|
|
"grad_norm": 0.9269899206555638,
|
|
"learning_rate": 5.888294414081024e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532302141189575,
|
|
"step": 3145,
|
|
"valid_targets_mean": 1913.5,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 5.431034482758621,
|
|
"grad_norm": 0.8973198139466191,
|
|
"learning_rate": 5.827499675203987e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676059603691101,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2549.4,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 5.439655172413793,
|
|
"grad_norm": 0.7944238364352102,
|
|
"learning_rate": 5.76696684415245e-06,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19005446135997772,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2430.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.448275862068965,
|
|
"grad_norm": 0.8242952463372876,
|
|
"learning_rate": 5.706697039573217e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2020699828863144,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2030.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 5.456896551724138,
|
|
"grad_norm": 0.8901005474674238,
|
|
"learning_rate": 5.646691375252344e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569023072719574,
|
|
"step": 3165,
|
|
"valid_targets_mean": 2169.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.4655172413793105,
|
|
"grad_norm": 0.736018639158119,
|
|
"learning_rate": 5.586950960094606e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18459513783454895,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2561.8,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.474137931034483,
|
|
"grad_norm": 0.8400494581096617,
|
|
"learning_rate": 5.527476898102959e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21703600883483887,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2296.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.482758620689655,
|
|
"grad_norm": 0.8209437576909444,
|
|
"learning_rate": 5.4682702883581395e-06,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574310600757599,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2344.2,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 5.491379310344827,
|
|
"grad_norm": 0.8182704176506642,
|
|
"learning_rate": 5.40933222499838e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275604784488678,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2814.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.8414281728930835,
|
|
"learning_rate": 5.350663797199174e-06,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21224384009838104,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2361.4,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.508620689655173,
|
|
"grad_norm": 0.7787724276238028,
|
|
"learning_rate": 5.292266089153149e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23665933310985565,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 5.517241379310345,
|
|
"grad_norm": 0.9139485332508287,
|
|
"learning_rate": 5.234140180050029e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272885262966156,
|
|
"step": 3200,
|
|
"valid_targets_mean": 2132.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 5.525862068965517,
|
|
"grad_norm": 0.6681187838459355,
|
|
"learning_rate": 5.1762871440566935e-06,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27599036693573,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3626.7,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 5.5344827586206895,
|
|
"grad_norm": 0.719024227101147,
|
|
"learning_rate": 5.118708050297332e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3196202218532562,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3611.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.543103448275862,
|
|
"grad_norm": 0.73712058796339,
|
|
"learning_rate": 5.061403962833669e-06,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4145488142967224,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3701.7,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.551724137931035,
|
|
"grad_norm": 0.8563148070230047,
|
|
"learning_rate": 5.004375940645314e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751157283782959,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2403.4,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 5.560344827586206,
|
|
"grad_norm": 0.7955026427941279,
|
|
"learning_rate": 4.947625037610219e-06,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20612727105617523,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2537.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 5.568965517241379,
|
|
"grad_norm": 0.9987730788341107,
|
|
"learning_rate": 4.8911523024851295e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2039443701505661,
|
|
"step": 3230,
|
|
"valid_targets_mean": 1640.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 5.577586206896552,
|
|
"grad_norm": 0.8832700315040387,
|
|
"learning_rate": 4.834958778886271e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25076743960380554,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2207.8,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.586206896551724,
|
|
"grad_norm": 0.9542645205391439,
|
|
"learning_rate": 4.779045505270043e-06,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26392078399658203,
|
|
"step": 3240,
|
|
"valid_targets_mean": 1774.3,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.594827586206897,
|
|
"grad_norm": 0.7567347660293566,
|
|
"learning_rate": 4.723413514913817e-06,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786219120025635,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3309.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 5.603448275862069,
|
|
"grad_norm": 0.6792209326601717,
|
|
"learning_rate": 4.66806383589685e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270943820476532,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 5.612068965517241,
|
|
"grad_norm": 0.7857291734419023,
|
|
"learning_rate": 4.6129974910812855e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24094904959201813,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2995.2,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 5.620689655172414,
|
|
"grad_norm": 1.0235608686110846,
|
|
"learning_rate": 4.558215498093252e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3251035213470459,
|
|
"step": 3260,
|
|
"valid_targets_mean": 1856.7,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.629310344827586,
|
|
"grad_norm": 0.9351963793116751,
|
|
"learning_rate": 4.503718869304063e-06,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23973754048347473,
|
|
"step": 3265,
|
|
"valid_targets_mean": 2018.8,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 5.637931034482759,
|
|
"grad_norm": 0.962954318386746,
|
|
"learning_rate": 4.449508611811482e-06,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40691524744033813,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2582.0,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 5.646551724137931,
|
|
"grad_norm": 0.8172013834884642,
|
|
"learning_rate": 4.395585727421139e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3875260353088379,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 5.655172413793103,
|
|
"grad_norm": 0.8610903423900477,
|
|
"learning_rate": 4.341951212628031e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21040914952754974,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2062.4,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 5.663793103448276,
|
|
"grad_norm": 1.0523753196953791,
|
|
"learning_rate": 4.288606058598048e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23646166920661926,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2081.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.672413793103448,
|
|
"grad_norm": 0.843692978853644,
|
|
"learning_rate": 4.235551251149714e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20414787530899048,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2991.9,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.681034482758621,
|
|
"grad_norm": 0.8451079144051367,
|
|
"learning_rate": 4.1827877707359474e-06,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31735336780548096,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3041.9,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 5.689655172413794,
|
|
"grad_norm": 0.8078901356285255,
|
|
"learning_rate": 4.130316592425934e-06,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23267899453639984,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2777.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 5.698275862068965,
|
|
"grad_norm": 0.8571064223274443,
|
|
"learning_rate": 4.078138685887125e-06,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22103667259216309,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2645.8,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 5.706896551724138,
|
|
"grad_norm": 0.8230034928760354,
|
|
"learning_rate": 4.026255015367302e-06,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21825623512268066,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2551.1,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 5.7155172413793105,
|
|
"grad_norm": 1.0239665276979915,
|
|
"learning_rate": 3.974666539676774e-06,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24945151805877686,
|
|
"step": 3315,
|
|
"valid_targets_mean": 1659.0,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.724137931034483,
|
|
"grad_norm": 1.0776616704513493,
|
|
"learning_rate": 3.923374212170634e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2398718148469925,
|
|
"step": 3320,
|
|
"valid_targets_mean": 1804.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 5.732758620689655,
|
|
"grad_norm": 1.016789683397506,
|
|
"learning_rate": 3.872378980731168e-06,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22868318855762482,
|
|
"step": 3325,
|
|
"valid_targets_mean": 1696.4,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 5.741379310344827,
|
|
"grad_norm": 0.822562726856188,
|
|
"learning_rate": 3.821681787750327e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.319583535194397,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2791.6,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 5.75,
|
|
"grad_norm": 0.8374432974605485,
|
|
"learning_rate": 3.7712835701122985e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27780675888061523,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2587.7,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 5.758620689655173,
|
|
"grad_norm": 0.8238659716528866,
|
|
"learning_rate": 3.721185259176223e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24886605143547058,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2995.2,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.767241379310345,
|
|
"grad_norm": 0.9083332235947693,
|
|
"learning_rate": 3.6713877807589503e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25362688302993774,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2286.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.775862068965517,
|
|
"grad_norm": 0.7791354120068396,
|
|
"learning_rate": 3.621892055117955e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2154545933008194,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2828.0,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 5.7844827586206895,
|
|
"grad_norm": 0.7390189719309704,
|
|
"learning_rate": 3.572698996934303e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241281419992447,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3631.4,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.793103448275862,
|
|
"grad_norm": 0.8756183555419305,
|
|
"learning_rate": 3.5238095152957906e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29824498295783997,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2598.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 5.801724137931035,
|
|
"grad_norm": 0.9337227488178528,
|
|
"learning_rate": 3.4752245136801065e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24506528675556183,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2258.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 5.810344827586206,
|
|
"grad_norm": 1.0890649426704166,
|
|
"learning_rate": 3.4269448899381354e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2098155915737152,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2132.2,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 5.818965517241379,
|
|
"grad_norm": 0.9953523310912673,
|
|
"learning_rate": 3.3789715362773955e-06,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24397800862789154,
|
|
"step": 3375,
|
|
"valid_targets_mean": 1919.3,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 5.827586206896552,
|
|
"grad_norm": 0.9040669773305245,
|
|
"learning_rate": 3.3313053392455317e-06,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670442461967468,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2793.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 5.836206896551724,
|
|
"grad_norm": 0.8329381538145997,
|
|
"learning_rate": 3.2839471797139287e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24865490198135376,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3038.6,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.844827586206897,
|
|
"grad_norm": 0.9337244826069802,
|
|
"learning_rate": 3.236897932861438e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2127501517534256,
|
|
"step": 3390,
|
|
"valid_targets_mean": 1871.8,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 5.853448275862069,
|
|
"grad_norm": 0.7431219731060789,
|
|
"learning_rate": 3.190158468158209e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20985054969787598,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3339.9,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 5.862068965517241,
|
|
"grad_norm": 0.7365106247265166,
|
|
"learning_rate": 3.1437296493496183e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18149128556251526,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2524.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.870689655172414,
|
|
"grad_norm": 0.7285497403315008,
|
|
"learning_rate": 3.0976123344402897e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19571764767169952,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2929.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 5.879310344827586,
|
|
"grad_norm": 0.8446091784157775,
|
|
"learning_rate": 3.0518073756782683e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21701383590698242,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2557.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.887931034482759,
|
|
"grad_norm": 0.8379682725324452,
|
|
"learning_rate": 3.0063156195392685e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22046051919460297,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2508.8,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 5.896551724137931,
|
|
"grad_norm": 0.823916206620456,
|
|
"learning_rate": 2.9611379067109914e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25937724113464355,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2341.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 5.905172413793103,
|
|
"grad_norm": 0.8688927625576043,
|
|
"learning_rate": 2.9162750720776366e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20510298013687134,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2424.8,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.913793103448276,
|
|
"grad_norm": 0.8413675713303786,
|
|
"learning_rate": 2.871727944704452e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2330384999513626,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2494.5,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 5.922413793103448,
|
|
"grad_norm": 0.8731479034508837,
|
|
"learning_rate": 2.8274973478224167e-06,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21474598348140717,
|
|
"step": 3435,
|
|
"valid_targets_mean": 1941.6,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.931034482758621,
|
|
"grad_norm": 0.7949398908604771,
|
|
"learning_rate": 2.783584098813006e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21405553817749023,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2754.8,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 5.939655172413794,
|
|
"grad_norm": 0.6099875987527934,
|
|
"learning_rate": 2.739989009193138e-06,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19622787833213806,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3920.4,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.948275862068965,
|
|
"grad_norm": 0.8887479983547679,
|
|
"learning_rate": 2.6967128846001234e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22184374928474426,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2611.6,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 5.956896551724138,
|
|
"grad_norm": 0.762426985169917,
|
|
"learning_rate": 2.6537565247768094e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22541582584381104,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2869.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.9655172413793105,
|
|
"grad_norm": 0.7213697453294076,
|
|
"learning_rate": 2.611120723556775e-06,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19540910422801971,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3429.9,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 5.974137931034483,
|
|
"grad_norm": 0.8288431420947139,
|
|
"learning_rate": 2.568806268849684e-06,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504294455051422,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3034.3,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 5.982758620689655,
|
|
"grad_norm": 0.8835454906756263,
|
|
"learning_rate": 2.526813942626736e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22182214260101318,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2302.6,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 5.991379310344827,
|
|
"grad_norm": 0.971771312176322,
|
|
"learning_rate": 2.4851445209061574e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23820282518863678,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2441.9,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.9859173949074006,
|
|
"learning_rate": 2.4437987737389277e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061915636062622,
|
|
"step": 3480,
|
|
"valid_targets_mean": 1670.5,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 6.008620689655173,
|
|
"grad_norm": 0.8259796364563601,
|
|
"learning_rate": 2.40277746519451e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662147283554077,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2172.5,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 6.017241379310345,
|
|
"grad_norm": 0.8122390172067061,
|
|
"learning_rate": 2.362081353346746e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083020746707916,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2627.9,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 6.025862068965517,
|
|
"grad_norm": 0.8572002021748207,
|
|
"learning_rate": 2.3217111902598298e-06,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1962701827287674,
|
|
"step": 3495,
|
|
"valid_targets_mean": 2450.2,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 6.0344827586206895,
|
|
"grad_norm": 0.7183758223798724,
|
|
"learning_rate": 2.2816677219744388e-06,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775016337633133,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3416.1,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 6.043103448275862,
|
|
"grad_norm": 0.8569666247164796,
|
|
"learning_rate": 2.241951688493924e-06,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2090853452682495,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2665.7,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 6.051724137931035,
|
|
"grad_norm": 0.7083072713399716,
|
|
"learning_rate": 2.2025638237706294e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21648496389389038,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3468.3,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.060344827586207,
|
|
"grad_norm": 0.7616299153165997,
|
|
"learning_rate": 2.1635048556923555e-06,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3211386799812317,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3530.7,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 6.068965517241379,
|
|
"grad_norm": 0.8144597761169627,
|
|
"learning_rate": 2.1247755060688856e-06,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765442728996277,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2601.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 6.077586206896552,
|
|
"grad_norm": 0.8808259389489778,
|
|
"learning_rate": 2.0863764906186514e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18842661380767822,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2145.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.086206896551724,
|
|
"grad_norm": 0.7029072678359862,
|
|
"learning_rate": 2.048308518955515e-06,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236032634973526,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2771.7,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 6.094827586206897,
|
|
"grad_norm": 0.9491045420303452,
|
|
"learning_rate": 2.010572294575641e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2161491960287094,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2009.6,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 6.103448275862069,
|
|
"grad_norm": 0.6859005396721884,
|
|
"learning_rate": 1.9731685148445168e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22911563515663147,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3666.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 6.112068965517241,
|
|
"grad_norm": 0.9557415355225928,
|
|
"learning_rate": 1.9360978709840304e-06,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21899035573005676,
|
|
"step": 3545,
|
|
"valid_targets_mean": 1906.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.120689655172414,
|
|
"grad_norm": 0.909071489081533,
|
|
"learning_rate": 1.8993610480597359e-06,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24376355111598969,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2097.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.129310344827586,
|
|
"grad_norm": 0.8247316406377753,
|
|
"learning_rate": 1.8629587249681802e-06,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19326910376548767,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2371.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.137931034482759,
|
|
"grad_norm": 0.7254426351650269,
|
|
"learning_rate": 1.8268915744243321e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21975982189178467,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2966.4,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.146551724137931,
|
|
"grad_norm": 0.9361941902410946,
|
|
"learning_rate": 1.7911602629491876e-06,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509339451789856,
|
|
"step": 3565,
|
|
"valid_targets_mean": 1950.9,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.155172413793103,
|
|
"grad_norm": 0.7228613254495483,
|
|
"learning_rate": 1.7557654508574339e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20333698391914368,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3122.8,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.163793103448276,
|
|
"grad_norm": 1.0020794026321742,
|
|
"learning_rate": 1.7207077922452465e-06,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.210041344165802,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2465.9,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.172413793103448,
|
|
"grad_norm": 0.8798123178752363,
|
|
"learning_rate": 1.6859879349782016e-06,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18440866470336914,
|
|
"step": 3580,
|
|
"valid_targets_mean": 2341.3,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 6.181034482758621,
|
|
"grad_norm": 0.8483923006765547,
|
|
"learning_rate": 1.6516065206793142e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22888490557670593,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2786.9,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 6.189655172413793,
|
|
"grad_norm": 0.6647533942461221,
|
|
"learning_rate": 1.6175641847171687e-06,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13367384672164917,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2980.8,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.198275862068965,
|
|
"grad_norm": 0.7683054390842444,
|
|
"learning_rate": 1.5838615561941705e-06,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20049786567687988,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3060.6,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 6.206896551724138,
|
|
"grad_norm": 0.7673616808413553,
|
|
"learning_rate": 1.550499257934952e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25009968876838684,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2919.8,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.2155172413793105,
|
|
"grad_norm": 0.9320141925380155,
|
|
"learning_rate": 1.5174779064748246e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25595584511756897,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2494.9,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.224137931034483,
|
|
"grad_norm": 0.793005521065028,
|
|
"learning_rate": 1.4847981120484089e-06,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2183004915714264,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2551.4,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 6.232758620689655,
|
|
"grad_norm": 0.984122982728532,
|
|
"learning_rate": 1.4524604785783548e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24975597858428955,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2036.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.241379310344827,
|
|
"grad_norm": 1.0714043895025145,
|
|
"learning_rate": 1.4204656036641717e-06,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23043999075889587,
|
|
"step": 3620,
|
|
"valid_targets_mean": 1476.6,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.7599854368014449,
|
|
"learning_rate": 1.3888140785711945e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2328336238861084,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2807.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 6.258620689655173,
|
|
"grad_norm": 0.7886353552490163,
|
|
"learning_rate": 1.3575064882196398e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25991952419281006,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2783.3,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 6.267241379310345,
|
|
"grad_norm": 0.8249182347946712,
|
|
"learning_rate": 1.326543411173833e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4094190299510956,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2826.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.275862068965517,
|
|
"grad_norm": 0.8263338135104763,
|
|
"learning_rate": 1.295925419631474e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25934290885925293,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2836.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 6.2844827586206895,
|
|
"grad_norm": 0.8722859174332926,
|
|
"learning_rate": 1.265653079413094e-06,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21666531264781952,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2185.1,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 6.293103448275862,
|
|
"grad_norm": 0.7035939411346323,
|
|
"learning_rate": 1.2357269499515745e-06,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17183193564414978,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2954.5,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.301724137931035,
|
|
"grad_norm": 0.8560939981440374,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16396474838256836,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2264.4,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.310344827586207,
|
|
"grad_norm": 0.8276157933772642,
|
|
"learning_rate": 1.176915529030589e-06,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20803743600845337,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2581.9,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 6.318965517241379,
|
|
"grad_norm": 0.8825248792003373,
|
|
"learning_rate": 1.1480313244062603e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19510574638843536,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2983.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.327586206896552,
|
|
"grad_norm": 1.0603206638893432,
|
|
"learning_rate": 1.1194955041889898e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4232352674007416,
|
|
"step": 3670,
|
|
"valid_targets_mean": 2577.1,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.336206896551724,
|
|
"grad_norm": 0.654952681959888,
|
|
"learning_rate": 1.0913085957207748e-06,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16756662726402283,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3552.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.344827586206897,
|
|
"grad_norm": 0.7777566470051069,
|
|
"learning_rate": 1.063471119895727e-06,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24378885328769684,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2565.6,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 6.353448275862069,
|
|
"grad_norm": 0.8061803441107526,
|
|
"learning_rate": 1.0359835911504246e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18783554434776306,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2660.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 6.362068965517241,
|
|
"grad_norm": 0.902717438655001,
|
|
"learning_rate": 1.0088465174544514e-06,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22732427716255188,
|
|
"step": 3690,
|
|
"valid_targets_mean": 2514.8,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 6.370689655172414,
|
|
"grad_norm": 0.8330379786967017,
|
|
"learning_rate": 9.820604003009614e-07,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19690130650997162,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2417.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.379310344827586,
|
|
"grad_norm": 0.8657508340163629,
|
|
"learning_rate": 9.556257346974319e-07,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20571178197860718,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2392.0,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 6.387931034482759,
|
|
"grad_norm": 0.9078987585519426,
|
|
"learning_rate": 9.295430091565261e-07,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16755205392837524,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2038.2,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.396551724137931,
|
|
"grad_norm": 0.6660952264268134,
|
|
"learning_rate": 9.038127056870416e-07,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19638925790786743,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3692.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.405172413793103,
|
|
"grad_norm": 0.6483874159711565,
|
|
"learning_rate": 8.784352997850277e-07,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2156984508037567,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3738.2,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 6.413793103448276,
|
|
"grad_norm": 1.1860093859601488,
|
|
"learning_rate": 8.534112604249789e-07,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21328943967819214,
|
|
"step": 3720,
|
|
"valid_targets_mean": 1351.2,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 6.422413793103448,
|
|
"grad_norm": 0.9595913540187618,
|
|
"learning_rate": 8.287410500511739e-07,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24893982708454132,
|
|
"step": 3725,
|
|
"valid_targets_mean": 1926.8,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 6.431034482758621,
|
|
"grad_norm": 0.6243254247221967,
|
|
"learning_rate": 8.044251245691393e-07,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19777286052703857,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4638.8,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.439655172413793,
|
|
"grad_norm": 1.0201483178932162,
|
|
"learning_rate": 7.804639333372077e-07,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19841013848781586,
|
|
"step": 3735,
|
|
"valid_targets_mean": 1597.1,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.448275862068965,
|
|
"grad_norm": 0.6715125124145136,
|
|
"learning_rate": 7.568579191582248e-07,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15024380385875702,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3333.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.456896551724138,
|
|
"grad_norm": 0.9225692501156839,
|
|
"learning_rate": 7.336075182713708e-07,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18564382195472717,
|
|
"step": 3745,
|
|
"valid_targets_mean": 1924.0,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.4655172413793105,
|
|
"grad_norm": 0.8431848482827717,
|
|
"learning_rate": 7.107131603440809e-07,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24477818608283997,
|
|
"step": 3750,
|
|
"valid_targets_mean": 2608.8,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.474137931034483,
|
|
"grad_norm": 0.8890579364206326,
|
|
"learning_rate": 6.881752684641219e-07,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23633387684822083,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2135.5,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 6.482758620689655,
|
|
"grad_norm": 0.9418285611708161,
|
|
"learning_rate": 6.659942591317703e-07,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21280354261398315,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2341.2,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 6.491379310344827,
|
|
"grad_norm": 0.8030498311605387,
|
|
"learning_rate": 6.441705422521072e-07,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081728518009186,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2511.6,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 0.5943846941535217,
|
|
"learning_rate": 6.22704521127444e-07,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15976674854755402,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3873.2,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 6.508620689655173,
|
|
"grad_norm": 1.0891839401674237,
|
|
"learning_rate": 6.015965924498912e-07,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21925121545791626,
|
|
"step": 3775,
|
|
"valid_targets_mean": 1394.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 6.517241379310345,
|
|
"grad_norm": 0.8599544341446648,
|
|
"learning_rate": 5.808471462939946e-07,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16710597276687622,
|
|
"step": 3780,
|
|
"valid_targets_mean": 2198.5,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 6.525862068965517,
|
|
"grad_norm": 0.7814752595407828,
|
|
"learning_rate": 5.604565661095484e-07,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16703981161117554,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2364.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 6.5344827586206895,
|
|
"grad_norm": 0.8041080575361683,
|
|
"learning_rate": 5.404252287145006e-07,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21759355068206787,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2688.5,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 6.543103448275862,
|
|
"grad_norm": 0.8965876942311776,
|
|
"learning_rate": 5.207535042879963e-07,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28278952836990356,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2443.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 6.551724137931035,
|
|
"grad_norm": 0.9071674222059003,
|
|
"learning_rate": 5.014417563635276e-07,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25929227471351624,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2408.9,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 6.560344827586206,
|
|
"grad_norm": 0.8831685374142777,
|
|
"learning_rate": 4.824903418222259e-07,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19960355758666992,
|
|
"step": 3805,
|
|
"valid_targets_mean": 2117.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 6.568965517241379,
|
|
"grad_norm": 0.8961223958340995,
|
|
"learning_rate": 4.638996108862559e-07,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22452160716056824,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2288.6,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 6.577586206896552,
|
|
"grad_norm": 1.0638472212212051,
|
|
"learning_rate": 4.456699071123538e-07,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24735592305660248,
|
|
"step": 3815,
|
|
"valid_targets_mean": 1975.4,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 6.586206896551724,
|
|
"grad_norm": 0.8474618730147804,
|
|
"learning_rate": 4.2780156738546407e-07,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27747148275375366,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2450.4,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 6.594827586206897,
|
|
"grad_norm": 0.8280145715710722,
|
|
"learning_rate": 4.1029492191253296e-07,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3170340657234192,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3543.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 6.603448275862069,
|
|
"grad_norm": 0.8420428298122106,
|
|
"learning_rate": 3.931502942163956e-07,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18059438467025757,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2666.2,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 6.612068965517241,
|
|
"grad_norm": 0.7350163043675808,
|
|
"learning_rate": 3.763680011297921e-07,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26293593645095825,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2851.2,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 6.620689655172414,
|
|
"grad_norm": 0.6594635552982795,
|
|
"learning_rate": 3.599483527895231e-07,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16505607962608337,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3067.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 6.629310344827586,
|
|
"grad_norm": 0.9184961795467407,
|
|
"learning_rate": 3.4389165263071233e-07,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21113553643226624,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2008.4,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 6.637931034482759,
|
|
"grad_norm": 0.7364720588352186,
|
|
"learning_rate": 3.2819819738119983e-07,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.306973397731781,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3472.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.646551724137931,
|
|
"grad_norm": 0.93393457736903,
|
|
"learning_rate": 3.1286827705605984e-07,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20894655585289001,
|
|
"step": 3855,
|
|
"valid_targets_mean": 1776.7,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.655172413793103,
|
|
"grad_norm": 0.8524518408875474,
|
|
"learning_rate": 2.979021749522448e-07,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16892358660697937,
|
|
"step": 3860,
|
|
"valid_targets_mean": 2273.3,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.663793103448276,
|
|
"grad_norm": 0.7130380541280331,
|
|
"learning_rate": 2.833001676433367e-07,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.213408425450325,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3203.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 6.672413793103448,
|
|
"grad_norm": 0.7018261489432114,
|
|
"learning_rate": 2.690625249744572e-07,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31899774074554443,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3903.7,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 6.681034482758621,
|
|
"grad_norm": 0.6418700087130179,
|
|
"learning_rate": 2.551895100572566e-07,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16666455566883087,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3481.6,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 6.689655172413794,
|
|
"grad_norm": 0.8733028268705049,
|
|
"learning_rate": 2.4168137926506854e-07,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2073197364807129,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2392.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 6.698275862068965,
|
|
"grad_norm": 0.7901410728478923,
|
|
"learning_rate": 2.2853838222817616e-07,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1996878683567047,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2705.2,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 6.706896551724138,
|
|
"grad_norm": 0.8946238203625446,
|
|
"learning_rate": 2.1576076182917794e-07,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23879259824752808,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2460.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 6.7155172413793105,
|
|
"grad_norm": 0.7680521503043941,
|
|
"learning_rate": 2.0334875419851573e-07,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21219167113304138,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3066.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 6.724137931034483,
|
|
"grad_norm": 0.936457613376202,
|
|
"learning_rate": 1.9130258871011165e-07,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052334785461426,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2051.9,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.732758620689655,
|
|
"grad_norm": 1.0475766339583639,
|
|
"learning_rate": 1.7962248797711356e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21644529700279236,
|
|
"step": 3905,
|
|
"valid_targets_mean": 1918.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 6.741379310344827,
|
|
"grad_norm": 0.8637115483109303,
|
|
"learning_rate": 1.683086678478074e-07,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22499002516269684,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2219.4,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.75,
|
|
"grad_norm": 1.0651120829370146,
|
|
"learning_rate": 1.573613374015981e-07,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26293978095054626,
|
|
"step": 3915,
|
|
"valid_targets_mean": 1706.4,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.758620689655173,
|
|
"grad_norm": 0.761333315392171,
|
|
"learning_rate": 1.4678069894517033e-07,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542230784893036,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3023.2,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 6.767241379310345,
|
|
"grad_norm": 0.8423961598753894,
|
|
"learning_rate": 1.3656694800873614e-07,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205635905265808,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2509.8,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 6.775862068965517,
|
|
"grad_norm": 0.916211614133893,
|
|
"learning_rate": 1.2672027334242887e-07,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20351552963256836,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2471.0,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 6.7844827586206895,
|
|
"grad_norm": 0.7579473459759021,
|
|
"learning_rate": 1.1724085691280806e-07,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18758231401443481,
|
|
"step": 3935,
|
|
"valid_targets_mean": 2898.5,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.793103448275862,
|
|
"grad_norm": 0.8231434675312551,
|
|
"learning_rate": 1.0812887389950233e-07,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19742494821548462,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2438.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 6.801724137931035,
|
|
"grad_norm": 0.9628228148144512,
|
|
"learning_rate": 9.938449269197181e-08,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23039749264717102,
|
|
"step": 3945,
|
|
"valid_targets_mean": 1953.7,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.810344827586206,
|
|
"grad_norm": 0.7894870509582401,
|
|
"learning_rate": 9.100787488639295e-08,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916305363178253,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.818965517241379,
|
|
"grad_norm": 0.7395599333644957,
|
|
"learning_rate": 8.299917528267198e-08,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22837454080581665,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3526.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.827586206896552,
|
|
"grad_norm": 0.8811432377167411,
|
|
"learning_rate": 7.535854188159164e-08,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23188039660453796,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2747.4,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 6.836206896551724,
|
|
"grad_norm": 0.8304476063590915,
|
|
"learning_rate": 6.808611588206448e-08,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31357303261756897,
|
|
"step": 3965,
|
|
"valid_targets_mean": 2781.1,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 6.844827586206897,
|
|
"grad_norm": 0.8993994292302855,
|
|
"learning_rate": 6.11820316785372e-08,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24727486073970795,
|
|
"step": 3970,
|
|
"valid_targets_mean": 2601.2,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 6.853448275862069,
|
|
"grad_norm": 0.9830591762448566,
|
|
"learning_rate": 5.464641685849259e-08,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648414969444275,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2094.7,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 6.862068965517241,
|
|
"grad_norm": 1.0086942806515675,
|
|
"learning_rate": 4.8479392200100336e-08,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2471552938222885,
|
|
"step": 3980,
|
|
"valid_targets_mean": 2030.5,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 6.870689655172414,
|
|
"grad_norm": 0.8571957254541847,
|
|
"learning_rate": 4.268107166998769e-08,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24853479862213135,
|
|
"step": 3985,
|
|
"valid_targets_mean": 2415.8,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 6.879310344827586,
|
|
"grad_norm": 0.8789858848023084,
|
|
"learning_rate": 3.7251562421123375e-08,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19849488139152527,
|
|
"step": 3990,
|
|
"valid_targets_mean": 2971.6,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 6.887931034482759,
|
|
"grad_norm": 0.8963419366048488,
|
|
"learning_rate": 3.219096479084804e-08,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2406509816646576,
|
|
"step": 3995,
|
|
"valid_targets_mean": 2845.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.896551724137931,
|
|
"grad_norm": 0.7922366801463806,
|
|
"learning_rate": 2.749937229901134e-08,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23498943448066711,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2786.1,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 6.905172413793103,
|
|
"grad_norm": 0.9071213805483908,
|
|
"learning_rate": 2.317687164624882e-08,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22399500012397766,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2348.9,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 6.913793103448276,
|
|
"grad_norm": 0.9367507061638377,
|
|
"learning_rate": 1.9223542712381026e-08,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24696654081344604,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2180.5,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 6.922413793103448,
|
|
"grad_norm": 0.9941698674033683,
|
|
"learning_rate": 1.563945855492799e-08,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26774609088897705,
|
|
"step": 4015,
|
|
"valid_targets_mean": 2133.8,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 6.931034482758621,
|
|
"grad_norm": 0.8500184025790589,
|
|
"learning_rate": 1.242468540777253e-08,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2405872642993927,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2453.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.939655172413794,
|
|
"grad_norm": 0.6095697059261187,
|
|
"learning_rate": 9.579282679927915e-09,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40256640315055847,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4629.2,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 6.948275862068965,
|
|
"grad_norm": 0.7009948391059693,
|
|
"learning_rate": 7.1033029544365085e-09,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1720399707555771,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3406.2,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 6.956896551724138,
|
|
"grad_norm": 0.7661105136161285,
|
|
"learning_rate": 4.996791987410543e-09,
|
|
"loss": 0.1926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1798725575208664,
|
|
"step": 4035,
|
|
"valid_targets_mean": 2961.6,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 6.9655172413793105,
|
|
"grad_norm": 0.9354847837018981,
|
|
"learning_rate": 3.2597887071750266e-09,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20513418316841125,
|
|
"step": 4040,
|
|
"valid_targets_mean": 2190.2,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.974137931034483,
|
|
"grad_norm": 0.8519742635190573,
|
|
"learning_rate": 1.892325213552759e-09,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20830674469470978,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2784.9,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 6.982758620689655,
|
|
"grad_norm": 0.9443206286590975,
|
|
"learning_rate": 8.944267772692527e-10,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20226803421974182,
|
|
"step": 4050,
|
|
"valid_targets_mean": 2149.4,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 6.991379310344827,
|
|
"grad_norm": 0.8811578949870185,
|
|
"learning_rate": 2.66111839490879e-10,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2233564555644989,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2065.1,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.9695032609226015,
|
|
"learning_rate": 7.392011478479787e-12,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25369924306869507,
|
|
"step": 4060,
|
|
"valid_targets_mean": 2212.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25369924306869507,
|
|
"step": 4060,
|
|
"total_flos": 644683152949248.0,
|
|
"train_loss": 0.3271850697072269,
|
|
"train_runtime": 13949.8914,
|
|
"train_samples_per_second": 4.655,
|
|
"train_steps_per_second": 0.291,
|
|
"valid_targets_mean": 2212.8,
|
|
"valid_targets_min": 431
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4060,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 644683152949248.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|