9310 lines
258 KiB
JSON
9310 lines
258 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4214,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008305647840531562,
|
|
"grad_norm": 24.90649124771031,
|
|
"learning_rate": 3.791469194312797e-07,
|
|
"loss": 0.8811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8554224371910095,
|
|
"step": 5,
|
|
"valid_targets_mean": 4347.9,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.016611295681063124,
|
|
"grad_norm": 21.57390482049456,
|
|
"learning_rate": 8.530805687203792e-07,
|
|
"loss": 0.8878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8921433687210083,
|
|
"step": 10,
|
|
"valid_targets_mean": 6913.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.024916943521594685,
|
|
"grad_norm": 20.64822180858936,
|
|
"learning_rate": 1.3270142180094788e-06,
|
|
"loss": 0.8641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8390516638755798,
|
|
"step": 15,
|
|
"valid_targets_mean": 4732.9,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 0.03322259136212625,
|
|
"grad_norm": 12.617710340453531,
|
|
"learning_rate": 1.8009478672985784e-06,
|
|
"loss": 0.823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8028559684753418,
|
|
"step": 20,
|
|
"valid_targets_mean": 5179.7,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.04152823920265781,
|
|
"grad_norm": 8.342912542605125,
|
|
"learning_rate": 2.2748815165876777e-06,
|
|
"loss": 0.7812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8277904391288757,
|
|
"step": 25,
|
|
"valid_targets_mean": 4217.7,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.04983388704318937,
|
|
"grad_norm": 3.9346859863894768,
|
|
"learning_rate": 2.7488151658767775e-06,
|
|
"loss": 0.7247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6857980489730835,
|
|
"step": 30,
|
|
"valid_targets_mean": 5529.4,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.05813953488372093,
|
|
"grad_norm": 2.3075137101592658,
|
|
"learning_rate": 3.222748815165877e-06,
|
|
"loss": 0.6785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.612958550453186,
|
|
"step": 35,
|
|
"valid_targets_mean": 5144.8,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 0.0664451827242525,
|
|
"grad_norm": 1.8044405868858724,
|
|
"learning_rate": 3.6966824644549766e-06,
|
|
"loss": 0.6538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6395242214202881,
|
|
"step": 40,
|
|
"valid_targets_mean": 4746.2,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.07475083056478406,
|
|
"grad_norm": 1.5269674826521575,
|
|
"learning_rate": 4.170616113744076e-06,
|
|
"loss": 0.6216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6274280548095703,
|
|
"step": 45,
|
|
"valid_targets_mean": 5542.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.08305647840531562,
|
|
"grad_norm": 1.2393521768813576,
|
|
"learning_rate": 4.644549763033176e-06,
|
|
"loss": 0.6023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6372599601745605,
|
|
"step": 50,
|
|
"valid_targets_mean": 5005.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.09136212624584718,
|
|
"grad_norm": 0.9840324084991864,
|
|
"learning_rate": 5.118483412322275e-06,
|
|
"loss": 0.5833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5527554154396057,
|
|
"step": 55,
|
|
"valid_targets_mean": 4091.0,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.09966777408637874,
|
|
"grad_norm": 1.007236335245198,
|
|
"learning_rate": 5.592417061611375e-06,
|
|
"loss": 0.5933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6056486368179321,
|
|
"step": 60,
|
|
"valid_targets_mean": 3350.8,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.1079734219269103,
|
|
"grad_norm": 0.6552364774903403,
|
|
"learning_rate": 6.066350710900475e-06,
|
|
"loss": 0.4975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4783812165260315,
|
|
"step": 65,
|
|
"valid_targets_mean": 5775.7,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.11627906976744186,
|
|
"grad_norm": 0.6425395539294818,
|
|
"learning_rate": 6.5402843601895735e-06,
|
|
"loss": 0.5674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5478832721710205,
|
|
"step": 70,
|
|
"valid_targets_mean": 7613.7,
|
|
"valid_targets_min": 2946
|
|
},
|
|
{
|
|
"epoch": 0.12458471760797342,
|
|
"grad_norm": 0.7209180184755657,
|
|
"learning_rate": 7.014218009478674e-06,
|
|
"loss": 0.5059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5054269433021545,
|
|
"step": 75,
|
|
"valid_targets_mean": 4301.4,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.132890365448505,
|
|
"grad_norm": 0.6432680513755612,
|
|
"learning_rate": 7.488151658767773e-06,
|
|
"loss": 0.5583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5150182247161865,
|
|
"step": 80,
|
|
"valid_targets_mean": 5032.5,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.14119601328903655,
|
|
"grad_norm": 0.7167593074195592,
|
|
"learning_rate": 7.962085308056872e-06,
|
|
"loss": 0.5252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5404971837997437,
|
|
"step": 85,
|
|
"valid_targets_mean": 4232.1,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 0.14950166112956811,
|
|
"grad_norm": 0.570478666134443,
|
|
"learning_rate": 8.436018957345973e-06,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5374413728713989,
|
|
"step": 90,
|
|
"valid_targets_mean": 6227.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 0.15780730897009967,
|
|
"grad_norm": 0.6461862549475816,
|
|
"learning_rate": 8.909952606635071e-06,
|
|
"loss": 0.4619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4013080596923828,
|
|
"step": 95,
|
|
"valid_targets_mean": 4347.4,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.16611295681063123,
|
|
"grad_norm": 0.563153408789091,
|
|
"learning_rate": 9.383886255924171e-06,
|
|
"loss": 0.4944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.499076783657074,
|
|
"step": 100,
|
|
"valid_targets_mean": 5568.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 0.1744186046511628,
|
|
"grad_norm": 0.6231199410071285,
|
|
"learning_rate": 9.85781990521327e-06,
|
|
"loss": 0.4805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47291380167007446,
|
|
"step": 105,
|
|
"valid_targets_mean": 4158.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.18272425249169436,
|
|
"grad_norm": 0.549840039643595,
|
|
"learning_rate": 1.033175355450237e-05,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42723697423934937,
|
|
"step": 110,
|
|
"valid_targets_mean": 4775.6,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 0.19102990033222592,
|
|
"grad_norm": 0.6338224712850236,
|
|
"learning_rate": 1.080568720379147e-05,
|
|
"loss": 0.4766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4813288152217865,
|
|
"step": 115,
|
|
"valid_targets_mean": 5191.1,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 0.19933554817275748,
|
|
"grad_norm": 0.5728043319531253,
|
|
"learning_rate": 1.127962085308057e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543510675430298,
|
|
"step": 120,
|
|
"valid_targets_mean": 4739.8,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 0.20764119601328904,
|
|
"grad_norm": 0.546257051358398,
|
|
"learning_rate": 1.1753554502369669e-05,
|
|
"loss": 0.4324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39734572172164917,
|
|
"step": 125,
|
|
"valid_targets_mean": 5393.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.2159468438538206,
|
|
"grad_norm": 0.552986087576501,
|
|
"learning_rate": 1.2227488151658769e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4367840886116028,
|
|
"step": 130,
|
|
"valid_targets_mean": 4981.8,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.22425249169435216,
|
|
"grad_norm": 0.5324103742052585,
|
|
"learning_rate": 1.270142180094787e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40893033146858215,
|
|
"step": 135,
|
|
"valid_targets_mean": 5697.0,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.23255813953488372,
|
|
"grad_norm": 0.5579902294134245,
|
|
"learning_rate": 1.3175355450236968e-05,
|
|
"loss": 0.4241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4034132957458496,
|
|
"step": 140,
|
|
"valid_targets_mean": 5757.1,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 0.24086378737541528,
|
|
"grad_norm": 0.5649584066174167,
|
|
"learning_rate": 1.3649289099526068e-05,
|
|
"loss": 0.4099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3850095272064209,
|
|
"step": 145,
|
|
"valid_targets_mean": 4622.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 0.24916943521594684,
|
|
"grad_norm": 0.6389677169760279,
|
|
"learning_rate": 1.4123222748815166e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.425051212310791,
|
|
"step": 150,
|
|
"valid_targets_mean": 4638.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.2574750830564784,
|
|
"grad_norm": 0.5513951146717218,
|
|
"learning_rate": 1.4597156398104267e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3969568908214569,
|
|
"step": 155,
|
|
"valid_targets_mean": 5463.9,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.26578073089701,
|
|
"grad_norm": 0.5744513655371918,
|
|
"learning_rate": 1.5071090047393367e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43855953216552734,
|
|
"step": 160,
|
|
"valid_targets_mean": 6124.2,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.27408637873754155,
|
|
"grad_norm": 0.7552094259397921,
|
|
"learning_rate": 1.5545023696682465e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38472798466682434,
|
|
"step": 165,
|
|
"valid_targets_mean": 5093.3,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.2823920265780731,
|
|
"grad_norm": 0.6094875351934611,
|
|
"learning_rate": 1.6018957345971565e-05,
|
|
"loss": 0.3574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3211315870285034,
|
|
"step": 170,
|
|
"valid_targets_mean": 5127.8,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 0.29069767441860467,
|
|
"grad_norm": 0.5201144407510339,
|
|
"learning_rate": 1.6492890995260666e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3895410895347595,
|
|
"step": 175,
|
|
"valid_targets_mean": 5951.4,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 0.29900332225913623,
|
|
"grad_norm": 0.5324097625156006,
|
|
"learning_rate": 1.6966824644549766e-05,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35034990310668945,
|
|
"step": 180,
|
|
"valid_targets_mean": 5621.4,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 0.3073089700996678,
|
|
"grad_norm": 0.6520611299640444,
|
|
"learning_rate": 1.7440758293838863e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39734140038490295,
|
|
"step": 185,
|
|
"valid_targets_mean": 3857.4,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 0.31561461794019935,
|
|
"grad_norm": 0.5658830481295248,
|
|
"learning_rate": 1.7914691943127963e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059755206108093,
|
|
"step": 190,
|
|
"valid_targets_mean": 5306.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.3239202657807309,
|
|
"grad_norm": 0.5950496153759633,
|
|
"learning_rate": 1.8388625592417063e-05,
|
|
"loss": 0.3764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36192968487739563,
|
|
"step": 195,
|
|
"valid_targets_mean": 4483.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.33222591362126247,
|
|
"grad_norm": 0.6042275291861512,
|
|
"learning_rate": 1.8862559241706163e-05,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3676679730415344,
|
|
"step": 200,
|
|
"valid_targets_mean": 4684.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.34053156146179403,
|
|
"grad_norm": 0.5198568739623043,
|
|
"learning_rate": 1.9336492890995263e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36391353607177734,
|
|
"step": 205,
|
|
"valid_targets_mean": 6340.4,
|
|
"valid_targets_min": 2937
|
|
},
|
|
{
|
|
"epoch": 0.3488372093023256,
|
|
"grad_norm": 0.5643053803798733,
|
|
"learning_rate": 1.9810426540284364e-05,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3798864483833313,
|
|
"step": 210,
|
|
"valid_targets_mean": 5458.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 0.6724804289482563,
|
|
"learning_rate": 2.0284360189573464e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3707191050052643,
|
|
"step": 215,
|
|
"valid_targets_mean": 4479.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.3654485049833887,
|
|
"grad_norm": 0.5618358507834652,
|
|
"learning_rate": 2.075829383886256e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3925355076789856,
|
|
"step": 220,
|
|
"valid_targets_mean": 5793.3,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.37375415282392027,
|
|
"grad_norm": 0.5887229523131748,
|
|
"learning_rate": 2.123222748815166e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35597363114356995,
|
|
"step": 225,
|
|
"valid_targets_mean": 5035.4,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 0.38205980066445183,
|
|
"grad_norm": 0.6532251647640784,
|
|
"learning_rate": 2.170616113744076e-05,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3737730085849762,
|
|
"step": 230,
|
|
"valid_targets_mean": 5790.0,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.3903654485049834,
|
|
"grad_norm": 0.6373654583165238,
|
|
"learning_rate": 2.2180094786729858e-05,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36688798666000366,
|
|
"step": 235,
|
|
"valid_targets_mean": 4740.2,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.39867109634551495,
|
|
"grad_norm": 0.5750410118543332,
|
|
"learning_rate": 2.265402843601896e-05,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4243386387825012,
|
|
"step": 240,
|
|
"valid_targets_mean": 5998.6,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 0.4069767441860465,
|
|
"grad_norm": 0.6881629358470099,
|
|
"learning_rate": 2.3127962085308058e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021879196166992,
|
|
"step": 245,
|
|
"valid_targets_mean": 3418.1,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.4152823920265781,
|
|
"grad_norm": 0.6341276989881133,
|
|
"learning_rate": 2.360189573459716e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34291350841522217,
|
|
"step": 250,
|
|
"valid_targets_mean": 4635.1,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 0.42358803986710963,
|
|
"grad_norm": 0.5876484750970724,
|
|
"learning_rate": 2.407582938388626e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3602282404899597,
|
|
"step": 255,
|
|
"valid_targets_mean": 5328.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.4318936877076412,
|
|
"grad_norm": 0.8191070239952852,
|
|
"learning_rate": 2.4549763033175355e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29510170221328735,
|
|
"step": 260,
|
|
"valid_targets_mean": 5275.2,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 0.44019933554817275,
|
|
"grad_norm": 0.6344230028149005,
|
|
"learning_rate": 2.502369668246446e-05,
|
|
"loss": 0.3618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31199130415916443,
|
|
"step": 265,
|
|
"valid_targets_mean": 3854.2,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 0.4485049833887043,
|
|
"grad_norm": 0.5860856592051626,
|
|
"learning_rate": 2.5497630331753556e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3384532034397125,
|
|
"step": 270,
|
|
"valid_targets_mean": 4699.3,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.4568106312292359,
|
|
"grad_norm": 0.5328335017048347,
|
|
"learning_rate": 2.597156398104266e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37549659609794617,
|
|
"step": 275,
|
|
"valid_targets_mean": 6095.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.46511627906976744,
|
|
"grad_norm": 0.6664724998242643,
|
|
"learning_rate": 2.6445497630331756e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3419986963272095,
|
|
"step": 280,
|
|
"valid_targets_mean": 3830.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.473421926910299,
|
|
"grad_norm": 0.6142798790216494,
|
|
"learning_rate": 2.6919431279620853e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3588425815105438,
|
|
"step": 285,
|
|
"valid_targets_mean": 5187.6,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 0.48172757475083056,
|
|
"grad_norm": 0.7030694807678257,
|
|
"learning_rate": 2.7393364928909956e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29806455969810486,
|
|
"step": 290,
|
|
"valid_targets_mean": 4411.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.4900332225913621,
|
|
"grad_norm": 0.5854364060244618,
|
|
"learning_rate": 2.7867298578199053e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223992586135864,
|
|
"step": 295,
|
|
"valid_targets_mean": 4932.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.4983388704318937,
|
|
"grad_norm": 0.5796252870223279,
|
|
"learning_rate": 2.8341232227488157e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806846499443054,
|
|
"step": 300,
|
|
"valid_targets_mean": 5174.5,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 0.5066445182724253,
|
|
"grad_norm": 0.6084738369273639,
|
|
"learning_rate": 2.8815165876777254e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3323943018913269,
|
|
"step": 305,
|
|
"valid_targets_mean": 5290.3,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.5149501661129569,
|
|
"grad_norm": 0.5612763586337469,
|
|
"learning_rate": 2.928909952606635e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992687523365021,
|
|
"step": 310,
|
|
"valid_targets_mean": 5911.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.5232558139534884,
|
|
"grad_norm": 0.5801345092627865,
|
|
"learning_rate": 2.9763033175355454e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32399457693099976,
|
|
"step": 315,
|
|
"valid_targets_mean": 5090.7,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.53156146179402,
|
|
"grad_norm": 0.5360647047905942,
|
|
"learning_rate": 3.023696682464455e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34694886207580566,
|
|
"step": 320,
|
|
"valid_targets_mean": 5753.2,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 0.5398671096345515,
|
|
"grad_norm": 0.587318834201618,
|
|
"learning_rate": 3.0710900473933654e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31561779975891113,
|
|
"step": 325,
|
|
"valid_targets_mean": 5655.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.5481727574750831,
|
|
"grad_norm": 0.6396828121786978,
|
|
"learning_rate": 3.1184834123222755e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30067670345306396,
|
|
"step": 330,
|
|
"valid_targets_mean": 4559.8,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.5564784053156147,
|
|
"grad_norm": 0.6659151509309751,
|
|
"learning_rate": 3.165876777251185e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814994156360626,
|
|
"step": 335,
|
|
"valid_targets_mean": 4616.1,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.5647840531561462,
|
|
"grad_norm": 0.5428774748245527,
|
|
"learning_rate": 3.213270142180095e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34746402502059937,
|
|
"step": 340,
|
|
"valid_targets_mean": 6225.1,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.5730897009966778,
|
|
"grad_norm": 0.7056285673816537,
|
|
"learning_rate": 3.260663507109005e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30092182755470276,
|
|
"step": 345,
|
|
"valid_targets_mean": 3763.5,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.5813953488372093,
|
|
"grad_norm": 0.6204559462829294,
|
|
"learning_rate": 3.308056872037915e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3269731402397156,
|
|
"step": 350,
|
|
"valid_targets_mean": 4788.4,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 0.5897009966777409,
|
|
"grad_norm": 0.6208534226633148,
|
|
"learning_rate": 3.355450236966825e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2896807789802551,
|
|
"step": 355,
|
|
"valid_targets_mean": 5023.6,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 0.5980066445182725,
|
|
"grad_norm": 0.6518014436267514,
|
|
"learning_rate": 3.402843601895735e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41362127661705017,
|
|
"step": 360,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.606312292358804,
|
|
"grad_norm": 0.5882372862199745,
|
|
"learning_rate": 3.450236966824645e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3265566825866699,
|
|
"step": 365,
|
|
"valid_targets_mean": 5055.9,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.6146179401993356,
|
|
"grad_norm": 0.7108524348589256,
|
|
"learning_rate": 3.497630331753555e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3792296051979065,
|
|
"step": 370,
|
|
"valid_targets_mean": 3260.6,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.6229235880398671,
|
|
"grad_norm": 0.5788678944097126,
|
|
"learning_rate": 3.545023696682465e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28096988797187805,
|
|
"step": 375,
|
|
"valid_targets_mean": 5005.9,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.6312292358803987,
|
|
"grad_norm": 0.5701805966866338,
|
|
"learning_rate": 3.592417061611375e-05,
|
|
"loss": 0.3737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3432411551475525,
|
|
"step": 380,
|
|
"valid_targets_mean": 5441.9,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.6395348837209303,
|
|
"grad_norm": 0.5476990194730532,
|
|
"learning_rate": 3.639810426540284e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33220481872558594,
|
|
"step": 385,
|
|
"valid_targets_mean": 5595.0,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 0.6478405315614618,
|
|
"grad_norm": 0.6817273101156454,
|
|
"learning_rate": 3.687203791469195e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132201135158539,
|
|
"step": 390,
|
|
"valid_targets_mean": 4173.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.6561461794019934,
|
|
"grad_norm": 0.5783431238438036,
|
|
"learning_rate": 3.7345971563981044e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26501888036727905,
|
|
"step": 395,
|
|
"valid_targets_mean": 4910.5,
|
|
"valid_targets_min": 2751
|
|
},
|
|
{
|
|
"epoch": 0.6644518272425249,
|
|
"grad_norm": 0.625750379877627,
|
|
"learning_rate": 3.781990521327015e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33998727798461914,
|
|
"step": 400,
|
|
"valid_targets_mean": 5056.7,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.6727574750830565,
|
|
"grad_norm": 0.709210098810998,
|
|
"learning_rate": 3.8293838862559244e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25374695658683777,
|
|
"step": 405,
|
|
"valid_targets_mean": 3242.7,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.6810631229235881,
|
|
"grad_norm": 0.611788037670816,
|
|
"learning_rate": 3.8767772511848344e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31435269117355347,
|
|
"step": 410,
|
|
"valid_targets_mean": 4622.7,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.6893687707641196,
|
|
"grad_norm": 0.6496611496679285,
|
|
"learning_rate": 3.9241706161137444e-05,
|
|
"loss": 0.3103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3404656648635864,
|
|
"step": 415,
|
|
"valid_targets_mean": 4763.7,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.6976744186046512,
|
|
"grad_norm": 0.6629458076002883,
|
|
"learning_rate": 3.9715639810426545e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047250807285309,
|
|
"step": 420,
|
|
"valid_targets_mean": 3460.1,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.7059800664451827,
|
|
"grad_norm": 0.5755053065666984,
|
|
"learning_rate": 3.999997254490316e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3613000810146332,
|
|
"step": 425,
|
|
"valid_targets_mean": 5691.2,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.4891795424431761,
|
|
"learning_rate": 3.999966367592929e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074835538864136,
|
|
"step": 430,
|
|
"valid_targets_mean": 5631.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.7225913621262459,
|
|
"grad_norm": 0.517655611269496,
|
|
"learning_rate": 3.9999011624428174e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3318852186203003,
|
|
"step": 435,
|
|
"valid_targets_mean": 5965.0,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.7308970099667774,
|
|
"grad_norm": 0.543600826786542,
|
|
"learning_rate": 3.999801640158863e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3029874563217163,
|
|
"step": 440,
|
|
"valid_targets_mean": 5263.9,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.739202657807309,
|
|
"grad_norm": 0.605567458226467,
|
|
"learning_rate": 3.9996678024488105e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3405247628688812,
|
|
"step": 445,
|
|
"valid_targets_mean": 4902.6,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 0.7475083056478405,
|
|
"grad_norm": 0.5093833359886875,
|
|
"learning_rate": 3.9994996516092365e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32281577587127686,
|
|
"step": 450,
|
|
"valid_targets_mean": 5584.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.7558139534883721,
|
|
"grad_norm": 0.5253033449456409,
|
|
"learning_rate": 3.999297190525511e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29381173849105835,
|
|
"step": 455,
|
|
"valid_targets_mean": 5823.1,
|
|
"valid_targets_min": 2969
|
|
},
|
|
{
|
|
"epoch": 0.7641196013289037,
|
|
"grad_norm": 0.5501245424319312,
|
|
"learning_rate": 3.999060422671747e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3256574869155884,
|
|
"step": 460,
|
|
"valid_targets_mean": 5808.8,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.7724252491694352,
|
|
"grad_norm": 0.5698648998452172,
|
|
"learning_rate": 3.9987893521107434e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097285032272339,
|
|
"step": 465,
|
|
"valid_targets_mean": 4471.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 0.7807308970099668,
|
|
"grad_norm": 0.5129456575123024,
|
|
"learning_rate": 3.9984839834939116e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818831205368042,
|
|
"step": 470,
|
|
"valid_targets_mean": 5137.4,
|
|
"valid_targets_min": 2230
|
|
},
|
|
{
|
|
"epoch": 0.7890365448504983,
|
|
"grad_norm": 0.6100452663413307,
|
|
"learning_rate": 3.9981443220611995e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27885013818740845,
|
|
"step": 475,
|
|
"valid_targets_mean": 3952.2,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.7973421926910299,
|
|
"grad_norm": 0.5712260677073212,
|
|
"learning_rate": 3.9977703736409967e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36189597845077515,
|
|
"step": 480,
|
|
"valid_targets_mean": 5212.1,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.8056478405315615,
|
|
"grad_norm": 0.6222700402552255,
|
|
"learning_rate": 3.997362144650041e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3484579920768738,
|
|
"step": 485,
|
|
"valid_targets_mean": 4130.6,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 0.813953488372093,
|
|
"grad_norm": 0.5450874044865239,
|
|
"learning_rate": 3.996919642093302e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771856188774109,
|
|
"step": 490,
|
|
"valid_targets_mean": 4410.2,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.8222591362126246,
|
|
"grad_norm": 0.5290505406057578,
|
|
"learning_rate": 3.996442873563866e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816286087036133,
|
|
"step": 495,
|
|
"valid_targets_mean": 5216.2,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 0.8305647840531561,
|
|
"grad_norm": 0.5431662062087397,
|
|
"learning_rate": 3.995931847242801e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29384660720825195,
|
|
"step": 500,
|
|
"valid_targets_mean": 4825.2,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 0.8388704318936877,
|
|
"grad_norm": 0.6024503835059016,
|
|
"learning_rate": 3.99538657189902e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2954856753349304,
|
|
"step": 505,
|
|
"valid_targets_mean": 4225.7,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 0.8471760797342193,
|
|
"grad_norm": 0.5969942089023372,
|
|
"learning_rate": 3.9948070568891284e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3443995416164398,
|
|
"step": 510,
|
|
"valid_targets_mean": 5974.2,
|
|
"valid_targets_min": 2142
|
|
},
|
|
{
|
|
"epoch": 0.8554817275747508,
|
|
"grad_norm": 0.5282106182125506,
|
|
"learning_rate": 3.994193312157265e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953241169452667,
|
|
"step": 515,
|
|
"valid_targets_mean": 4760.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.8637873754152824,
|
|
"grad_norm": 0.5078887957995915,
|
|
"learning_rate": 3.9935453482349304e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138103187084198,
|
|
"step": 520,
|
|
"valid_targets_mean": 6240.1,
|
|
"valid_targets_min": 3752
|
|
},
|
|
{
|
|
"epoch": 0.872093023255814,
|
|
"grad_norm": 0.5518672153499704,
|
|
"learning_rate": 3.992863176240806e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32322049140930176,
|
|
"step": 525,
|
|
"valid_targets_mean": 5761.4,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 0.8803986710963455,
|
|
"grad_norm": 0.46857757562133284,
|
|
"learning_rate": 3.992146807880565e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29873818159103394,
|
|
"step": 530,
|
|
"valid_targets_mean": 6330.7,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.8887043189368771,
|
|
"grad_norm": 0.6256100369974679,
|
|
"learning_rate": 3.9913962554466683e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3322351574897766,
|
|
"step": 535,
|
|
"valid_targets_mean": 4274.1,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.8970099667774086,
|
|
"grad_norm": 0.5706429790062494,
|
|
"learning_rate": 3.9906115318181565e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255405455827713,
|
|
"step": 540,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.9053156146179402,
|
|
"grad_norm": 0.4697555608302045,
|
|
"learning_rate": 3.9897926504604294e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307442307472229,
|
|
"step": 545,
|
|
"valid_targets_mean": 5420.3,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.9136212624584718,
|
|
"grad_norm": 0.5693836813338435,
|
|
"learning_rate": 3.9889396254250106e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32838526368141174,
|
|
"step": 550,
|
|
"valid_targets_mean": 4191.3,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 0.9219269102990033,
|
|
"grad_norm": 0.49609641454461595,
|
|
"learning_rate": 3.9880524713493105e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922705411911011,
|
|
"step": 555,
|
|
"valid_targets_mean": 5203.7,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 0.9302325581395349,
|
|
"grad_norm": 0.47198199577536987,
|
|
"learning_rate": 3.9871312034563754e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919141352176666,
|
|
"step": 560,
|
|
"valid_targets_mean": 6211.7,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 0.9385382059800664,
|
|
"grad_norm": 0.5032912785906263,
|
|
"learning_rate": 3.9861758375546216e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905442714691162,
|
|
"step": 565,
|
|
"valid_targets_mean": 5157.6,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 0.946843853820598,
|
|
"grad_norm": 0.5372481941686715,
|
|
"learning_rate": 3.9851863900375703e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291934132575989,
|
|
"step": 570,
|
|
"valid_targets_mean": 4319.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.9551495016611296,
|
|
"grad_norm": 0.5102374352388634,
|
|
"learning_rate": 3.984162877883562e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3316214680671692,
|
|
"step": 575,
|
|
"valid_targets_mean": 5702.9,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.9634551495016611,
|
|
"grad_norm": 0.523749302166601,
|
|
"learning_rate": 3.983105318655465e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496777415275574,
|
|
"step": 580,
|
|
"valid_targets_mean": 5663.3,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.9717607973421927,
|
|
"grad_norm": 0.4864194287838741,
|
|
"learning_rate": 3.982013730500379e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073327839374542,
|
|
"step": 585,
|
|
"valid_targets_mean": 5504.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.9800664451827242,
|
|
"grad_norm": 0.5362913534373752,
|
|
"learning_rate": 3.980888132149316e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812083065509796,
|
|
"step": 590,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 0.9883720930232558,
|
|
"grad_norm": 0.637127886283442,
|
|
"learning_rate": 3.979728542916886e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064161539077759,
|
|
"step": 595,
|
|
"valid_targets_mean": 3246.2,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.9966777408637874,
|
|
"grad_norm": 0.5931735660610699,
|
|
"learning_rate": 3.978534982700962e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28773564100265503,
|
|
"step": 600,
|
|
"valid_targets_mean": 4369.7,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 1.004983388704319,
|
|
"grad_norm": 0.6888231660560359,
|
|
"learning_rate": 3.977307471982339e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618839383125305,
|
|
"step": 605,
|
|
"valid_targets_mean": 5469.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 1.0132890365448506,
|
|
"grad_norm": 0.5229107457893594,
|
|
"learning_rate": 3.976046031824382e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850693464279175,
|
|
"step": 610,
|
|
"valid_targets_mean": 6157.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 1.021594684385382,
|
|
"grad_norm": 0.5426710969274259,
|
|
"learning_rate": 3.974750683872667e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3326389491558075,
|
|
"step": 615,
|
|
"valid_targets_mean": 4686.4,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 1.0299003322259137,
|
|
"grad_norm": 0.575904915408061,
|
|
"learning_rate": 3.9734214503546066e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3624778389930725,
|
|
"step": 620,
|
|
"valid_targets_mean": 6963.6,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 1.0382059800664452,
|
|
"grad_norm": 0.4865966751403152,
|
|
"learning_rate": 3.97205835407907e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767280340194702,
|
|
"step": 625,
|
|
"valid_targets_mean": 5464.8,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.0465116279069768,
|
|
"grad_norm": 0.5570656420740334,
|
|
"learning_rate": 3.970661418435993e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34007349610328674,
|
|
"step": 630,
|
|
"valid_targets_mean": 5742.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.0548172757475083,
|
|
"grad_norm": 0.512004304333533,
|
|
"learning_rate": 3.969230667395971e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27598506212234497,
|
|
"step": 635,
|
|
"valid_targets_mean": 4853.6,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 1.06312292358804,
|
|
"grad_norm": 0.5409219117546781,
|
|
"learning_rate": 3.967766125509858e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624813914299011,
|
|
"step": 640,
|
|
"valid_targets_mean": 5001.8,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 0.4748380718924472,
|
|
"learning_rate": 3.966267817908335e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3227808177471161,
|
|
"step": 645,
|
|
"valid_targets_mean": 6515.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.079734219269103,
|
|
"grad_norm": 0.4877023528906391,
|
|
"learning_rate": 3.964735770301482e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3336644172668457,
|
|
"step": 650,
|
|
"valid_targets_mean": 6270.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.0880398671096345,
|
|
"grad_norm": 0.5021875688114863,
|
|
"learning_rate": 3.963170008978342e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544490694999695,
|
|
"step": 655,
|
|
"valid_targets_mean": 4696.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 1.0963455149501662,
|
|
"grad_norm": 0.5060336791526511,
|
|
"learning_rate": 3.961570560806461e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28085631132125854,
|
|
"step": 660,
|
|
"valid_targets_mean": 5120.9,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 1.1046511627906976,
|
|
"grad_norm": 0.5495574832232266,
|
|
"learning_rate": 3.959937453231436e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32085978984832764,
|
|
"step": 665,
|
|
"valid_targets_mean": 4189.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.1129568106312293,
|
|
"grad_norm": 0.560757660216992,
|
|
"learning_rate": 3.9582707142764356e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655804753303528,
|
|
"step": 670,
|
|
"valid_targets_mean": 6302.7,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 1.1212624584717608,
|
|
"grad_norm": 0.5548385880405362,
|
|
"learning_rate": 3.9565703725417266e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701783776283264,
|
|
"step": 675,
|
|
"valid_targets_mean": 4614.1,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.1295681063122924,
|
|
"grad_norm": 0.5443693352271429,
|
|
"learning_rate": 3.9548364572041774e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34064435958862305,
|
|
"step": 680,
|
|
"valid_targets_mean": 5705.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.1378737541528239,
|
|
"grad_norm": 0.5025618915475367,
|
|
"learning_rate": 3.9530689980167615e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27261146903038025,
|
|
"step": 685,
|
|
"valid_targets_mean": 5229.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.1461794019933556,
|
|
"grad_norm": 0.5259931145802894,
|
|
"learning_rate": 3.951268025308043e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3344430923461914,
|
|
"step": 690,
|
|
"valid_targets_mean": 6000.6,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 1.154485049833887,
|
|
"grad_norm": 0.522254224522639,
|
|
"learning_rate": 3.949433569981661e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133403956890106,
|
|
"step": 695,
|
|
"valid_targets_mean": 4665.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.1627906976744187,
|
|
"grad_norm": 0.47529643830777113,
|
|
"learning_rate": 3.9475656635157954e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28639915585517883,
|
|
"step": 700,
|
|
"valid_targets_mean": 6236.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.1710963455149501,
|
|
"grad_norm": 0.49540769023527614,
|
|
"learning_rate": 3.9456643379626284e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145819306373596,
|
|
"step": 705,
|
|
"valid_targets_mean": 5442.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.1794019933554818,
|
|
"grad_norm": 0.5561531878884969,
|
|
"learning_rate": 3.943729625947794e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25694239139556885,
|
|
"step": 710,
|
|
"valid_targets_mean": 4001.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 1.1877076411960132,
|
|
"grad_norm": 0.5058519892156035,
|
|
"learning_rate": 3.941761560669818e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663191556930542,
|
|
"step": 715,
|
|
"valid_targets_mean": 4916.1,
|
|
"valid_targets_min": 2674
|
|
},
|
|
{
|
|
"epoch": 1.196013289036545,
|
|
"grad_norm": 0.49820954033094433,
|
|
"learning_rate": 3.9397601758995493e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719859480857849,
|
|
"step": 720,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.2043189368770764,
|
|
"grad_norm": 0.539586508629954,
|
|
"learning_rate": 3.93772550597958e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3042001724243164,
|
|
"step": 725,
|
|
"valid_targets_mean": 4358.6,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.212624584717608,
|
|
"grad_norm": 0.4769920011384226,
|
|
"learning_rate": 3.935657585823655e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530266046524048,
|
|
"step": 730,
|
|
"valid_targets_mean": 4836.1,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.2209302325581395,
|
|
"grad_norm": 0.49405319114559576,
|
|
"learning_rate": 3.9335564509160746e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25528600811958313,
|
|
"step": 735,
|
|
"valid_targets_mean": 5111.9,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.2292358803986712,
|
|
"grad_norm": 0.53746972484268,
|
|
"learning_rate": 3.931422137311084e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28906476497650146,
|
|
"step": 740,
|
|
"valid_targets_mean": 5207.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.2375415282392026,
|
|
"grad_norm": 0.45933630630092054,
|
|
"learning_rate": 3.929254681632257e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265758216381073,
|
|
"step": 745,
|
|
"valid_targets_mean": 4943.7,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.2458471760797343,
|
|
"grad_norm": 0.5161787118108916,
|
|
"learning_rate": 3.927054121071864e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25383734703063965,
|
|
"step": 750,
|
|
"valid_targets_mean": 4219.4,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.2541528239202657,
|
|
"grad_norm": 0.4633222020178814,
|
|
"learning_rate": 3.924820493390236e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3410293459892273,
|
|
"step": 755,
|
|
"valid_targets_mean": 6382.1,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.2624584717607974,
|
|
"grad_norm": 0.4354200542321231,
|
|
"learning_rate": 3.9225538369151185e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690141797065735,
|
|
"step": 760,
|
|
"valid_targets_mean": 5444.6,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 1.2707641196013288,
|
|
"grad_norm": 0.49534721780952795,
|
|
"learning_rate": 3.92025419054101e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663045823574066,
|
|
"step": 765,
|
|
"valid_targets_mean": 4536.6,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.2790697674418605,
|
|
"grad_norm": 0.5446530164253917,
|
|
"learning_rate": 3.9179215937284965e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622339129447937,
|
|
"step": 770,
|
|
"valid_targets_mean": 3900.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.287375415282392,
|
|
"grad_norm": 0.8125826068839734,
|
|
"learning_rate": 3.915556086503576e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28925812244415283,
|
|
"step": 775,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.2956810631229236,
|
|
"grad_norm": 0.5440655357444971,
|
|
"learning_rate": 3.913157709456966e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915308475494385,
|
|
"step": 780,
|
|
"valid_targets_mean": 5366.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.303986710963455,
|
|
"grad_norm": 0.4896620630043848,
|
|
"learning_rate": 3.910726503743415e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601667046546936,
|
|
"step": 785,
|
|
"valid_targets_mean": 4646.8,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 1.3122923588039868,
|
|
"grad_norm": 0.4729636402881433,
|
|
"learning_rate": 3.90826251108099e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063233196735382,
|
|
"step": 790,
|
|
"valid_targets_mean": 5293.9,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.3205980066445182,
|
|
"grad_norm": 0.4540977138251527,
|
|
"learning_rate": 3.905765773750364e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3045766055583954,
|
|
"step": 795,
|
|
"valid_targets_mean": 5919.5,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 1.3289036544850499,
|
|
"grad_norm": 0.5337914557604931,
|
|
"learning_rate": 3.903236334594089e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062437176704407,
|
|
"step": 800,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.3372093023255813,
|
|
"grad_norm": 0.5221560381668141,
|
|
"learning_rate": 3.900674237015859e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26259639859199524,
|
|
"step": 805,
|
|
"valid_targets_mean": 4090.6,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 1.345514950166113,
|
|
"grad_norm": 0.520929596409131,
|
|
"learning_rate": 3.898079524979772e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634012699127197,
|
|
"step": 810,
|
|
"valid_targets_mean": 4800.0,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.3538205980066444,
|
|
"grad_norm": 0.516090769413903,
|
|
"learning_rate": 3.895452243009567e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004676103591919,
|
|
"step": 815,
|
|
"valid_targets_mean": 4922.6,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 1.3621262458471761,
|
|
"grad_norm": 0.47503790854010763,
|
|
"learning_rate": 3.8927924361878655e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30538129806518555,
|
|
"step": 820,
|
|
"valid_targets_mean": 6194.9,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 1.3704318936877076,
|
|
"grad_norm": 0.5702242499814434,
|
|
"learning_rate": 3.890100150155397e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950683832168579,
|
|
"step": 825,
|
|
"valid_targets_mean": 4155.8,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.3787375415282392,
|
|
"grad_norm": 0.4446797988072569,
|
|
"learning_rate": 3.887375431110214e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28779417276382446,
|
|
"step": 830,
|
|
"valid_targets_mean": 5732.4,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.3870431893687707,
|
|
"grad_norm": 0.5462795314647529,
|
|
"learning_rate": 3.884618325806901e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29204294085502625,
|
|
"step": 835,
|
|
"valid_targets_mean": 4158.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.3953488372093024,
|
|
"grad_norm": 0.5320903576190924,
|
|
"learning_rate": 3.881828881555773e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772865295410156,
|
|
"step": 840,
|
|
"valid_targets_mean": 3987.7,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 1.4036544850498338,
|
|
"grad_norm": 0.5095062063836396,
|
|
"learning_rate": 3.87900714622206e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29997989535331726,
|
|
"step": 845,
|
|
"valid_targets_mean": 5373.9,
|
|
"valid_targets_min": 2288
|
|
},
|
|
{
|
|
"epoch": 1.4119601328903655,
|
|
"grad_norm": 0.602571170816516,
|
|
"learning_rate": 3.87615316822509e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31255316734313965,
|
|
"step": 850,
|
|
"valid_targets_mean": 3572.0,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.420265780730897,
|
|
"grad_norm": 0.5568822693051746,
|
|
"learning_rate": 3.873266996537456e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34251508116722107,
|
|
"step": 855,
|
|
"valid_targets_mean": 5009.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.5428853911457303,
|
|
"learning_rate": 3.870348680684175e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.315402090549469,
|
|
"step": 860,
|
|
"valid_targets_mean": 4494.9,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.43687707641196,
|
|
"grad_norm": 0.5046434649507636,
|
|
"learning_rate": 3.867398270741839e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25718969106674194,
|
|
"step": 865,
|
|
"valid_targets_mean": 4553.3,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 1.4451827242524917,
|
|
"grad_norm": 0.47290559332155496,
|
|
"learning_rate": 3.864415817337757e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3590620160102844,
|
|
"step": 870,
|
|
"valid_targets_mean": 6967.1,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 1.4534883720930232,
|
|
"grad_norm": 0.480075912672473,
|
|
"learning_rate": 3.861401371649085e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33668065071105957,
|
|
"step": 875,
|
|
"valid_targets_mean": 6199.4,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.4617940199335548,
|
|
"grad_norm": 0.43575159279221254,
|
|
"learning_rate": 3.8583549854019466e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30505332350730896,
|
|
"step": 880,
|
|
"valid_targets_mean": 6428.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.4700996677740865,
|
|
"grad_norm": 0.48075804774657693,
|
|
"learning_rate": 3.855276710870547e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913748025894165,
|
|
"step": 885,
|
|
"valid_targets_mean": 4890.1,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.478405315614618,
|
|
"grad_norm": 0.5090381384782782,
|
|
"learning_rate": 3.852166600876277e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767033874988556,
|
|
"step": 890,
|
|
"valid_targets_mean": 4748.8,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.4867109634551494,
|
|
"grad_norm": 0.5615409689463994,
|
|
"learning_rate": 3.8490247087868035e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018428087234497,
|
|
"step": 895,
|
|
"valid_targets_mean": 3511.5,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.495016611295681,
|
|
"grad_norm": 0.44354493994948413,
|
|
"learning_rate": 3.845851088515159e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23887747526168823,
|
|
"step": 900,
|
|
"valid_targets_mean": 4815.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.5033222591362128,
|
|
"grad_norm": 0.46080210018709905,
|
|
"learning_rate": 3.8426457945188065e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284885972738266,
|
|
"step": 905,
|
|
"valid_targets_mean": 5573.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 1.5116279069767442,
|
|
"grad_norm": 0.5065519148508693,
|
|
"learning_rate": 3.839408881798719e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346424698829651,
|
|
"step": 910,
|
|
"valid_targets_mean": 5023.8,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 1.5199335548172757,
|
|
"grad_norm": 0.4768640797052248,
|
|
"learning_rate": 3.8361404058984236e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29179665446281433,
|
|
"step": 915,
|
|
"valid_targets_mean": 5075.8,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 1.5282392026578073,
|
|
"grad_norm": 0.4604747438731477,
|
|
"learning_rate": 3.832840422903054e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530898153781891,
|
|
"step": 920,
|
|
"valid_targets_mean": 5026.8,
|
|
"valid_targets_min": 2675
|
|
},
|
|
{
|
|
"epoch": 1.536544850498339,
|
|
"grad_norm": 0.5664497987628933,
|
|
"learning_rate": 3.829508989438389e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311147004365921,
|
|
"step": 925,
|
|
"valid_targets_mean": 3677.4,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.5448504983388704,
|
|
"grad_norm": 0.46195419032984614,
|
|
"learning_rate": 3.826146162669876e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27619361877441406,
|
|
"step": 930,
|
|
"valid_targets_mean": 5386.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.553156146179402,
|
|
"grad_norm": 0.47345367476340317,
|
|
"learning_rate": 3.822752000301656e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29433903098106384,
|
|
"step": 935,
|
|
"valid_targets_mean": 5194.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 1.5614617940199336,
|
|
"grad_norm": 0.4713078344243984,
|
|
"learning_rate": 3.819326560575569e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022962212562561,
|
|
"step": 940,
|
|
"valid_targets_mean": 4915.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.5697674418604652,
|
|
"grad_norm": 0.47522244364229127,
|
|
"learning_rate": 3.815869902270157e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3250786066055298,
|
|
"step": 945,
|
|
"valid_targets_mean": 4762.6,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.5780730897009967,
|
|
"grad_norm": 0.46616999252565067,
|
|
"learning_rate": 3.8123820846996536e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30675849318504333,
|
|
"step": 950,
|
|
"valid_targets_mean": 6108.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.5863787375415281,
|
|
"grad_norm": 0.4422127569635055,
|
|
"learning_rate": 3.808863167712967e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28909924626350403,
|
|
"step": 955,
|
|
"valid_targets_mean": 6049.4,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 1.5946843853820598,
|
|
"grad_norm": 0.5289770482157309,
|
|
"learning_rate": 3.805313211692655e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245149850845337,
|
|
"step": 960,
|
|
"valid_targets_mean": 4808.3,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 1.6029900332225915,
|
|
"grad_norm": 0.47739816422169007,
|
|
"learning_rate": 3.801732277553885e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31651172041893005,
|
|
"step": 965,
|
|
"valid_targets_mean": 5467.1,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.611295681063123,
|
|
"grad_norm": 0.49128832194702554,
|
|
"learning_rate": 3.7981204267433935e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27829277515411377,
|
|
"step": 970,
|
|
"valid_targets_mean": 4486.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 1.6196013289036544,
|
|
"grad_norm": 0.47558399775204485,
|
|
"learning_rate": 3.794477721238425e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587466835975647,
|
|
"step": 975,
|
|
"valid_targets_mean": 5338.1,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 1.627906976744186,
|
|
"grad_norm": 0.4885403016583345,
|
|
"learning_rate": 3.7908042235456753e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291198194026947,
|
|
"step": 980,
|
|
"valid_targets_mean": 4396.5,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.6362126245847177,
|
|
"grad_norm": 0.4863298644834688,
|
|
"learning_rate": 3.787099996700216e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27185869216918945,
|
|
"step": 985,
|
|
"valid_targets_mean": 4082.8,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 1.6445182724252492,
|
|
"grad_norm": 0.5892519057937488,
|
|
"learning_rate": 3.783365104264413e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699460983276367,
|
|
"step": 990,
|
|
"valid_targets_mean": 4058.5,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.6528239202657806,
|
|
"grad_norm": 0.4390227504611203,
|
|
"learning_rate": 3.779599610326833e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203485608100891,
|
|
"step": 995,
|
|
"valid_targets_mean": 6579.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.6611295681063123,
|
|
"grad_norm": 0.4926627355777149,
|
|
"learning_rate": 3.775803579501151e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27367210388183594,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4893.9,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 1.669435215946844,
|
|
"grad_norm": 0.4824293369084763,
|
|
"learning_rate": 3.771977076925034e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616872787475586,
|
|
"step": 1005,
|
|
"valid_targets_mean": 5169.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.6777408637873754,
|
|
"grad_norm": 0.45115930999315945,
|
|
"learning_rate": 3.7681201682590286e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078243136405945,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5118.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.6860465116279069,
|
|
"grad_norm": 0.4423177799515056,
|
|
"learning_rate": 3.7642329196854295e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28629523515701294,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6058.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.6943521594684385,
|
|
"grad_norm": 0.4484774589828352,
|
|
"learning_rate": 3.760315397907149e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919682264328003,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5293.2,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.7026578073089702,
|
|
"grad_norm": 0.4334033546024002,
|
|
"learning_rate": 3.756367670146567e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842004895210266,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5717.6,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.7109634551495017,
|
|
"grad_norm": 0.4880672194601611,
|
|
"learning_rate": 3.752389804144384e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29569852352142334,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4964.4,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.719269102990033,
|
|
"grad_norm": 0.49166601090686773,
|
|
"learning_rate": 3.748381868158452e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26930907368659973,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3834.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.7275747508305648,
|
|
"grad_norm": 0.5153135290941661,
|
|
"learning_rate": 3.7443439309626085e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25721341371536255,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4305.4,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 1.7358803986710964,
|
|
"grad_norm": 0.457705826628166,
|
|
"learning_rate": 3.740276061845493e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25773972272872925,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4761.2,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 1.744186046511628,
|
|
"grad_norm": 0.42636194371496333,
|
|
"learning_rate": 3.7361783306093596e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969478368759155,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5610.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.7524916943521593,
|
|
"grad_norm": 0.4297676090343185,
|
|
"learning_rate": 3.732050807568878e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28738221526145935,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5096.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.760797342192691,
|
|
"grad_norm": 0.4193183194698275,
|
|
"learning_rate": 3.7278935635499285e-05,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21359995007514954,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4770.6,
|
|
"valid_targets_min": 2301
|
|
},
|
|
{
|
|
"epoch": 1.7691029900332227,
|
|
"grad_norm": 0.45339455688823804,
|
|
"learning_rate": 3.723706669888388e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24299491941928864,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4686.6,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.7774086378737541,
|
|
"grad_norm": 0.5075151366857561,
|
|
"learning_rate": 3.7194901984289e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32399889826774597,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5412.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 0.4373673876080265,
|
|
"learning_rate": 3.715244221523649e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316758394241333,
|
|
"step": 1075,
|
|
"valid_targets_mean": 6086.1,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 1.7940199335548173,
|
|
"grad_norm": 0.4205167924019315,
|
|
"learning_rate": 3.710968812031114e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24194714426994324,
|
|
"step": 1080,
|
|
"valid_targets_mean": 6067.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.802325581395349,
|
|
"grad_norm": 0.4514189076636302,
|
|
"learning_rate": 3.7066640433148205e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659444510936737,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4856.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.8106312292358804,
|
|
"grad_norm": 0.4800486664988426,
|
|
"learning_rate": 3.7023299892420814e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994151711463928,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4686.7,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.8189368770764118,
|
|
"grad_norm": 0.43448979114940633,
|
|
"learning_rate": 3.697966724182729e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875637114048004,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6331.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.8272425249169435,
|
|
"grad_norm": 0.4773404484876808,
|
|
"learning_rate": 3.6935743230078364e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707805335521698,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4793.4,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 1.8355481727574752,
|
|
"grad_norm": 0.5152096558125074,
|
|
"learning_rate": 3.689152861088441e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29115524888038635,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4612.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.8438538205980066,
|
|
"grad_norm": 0.4560754382421404,
|
|
"learning_rate": 3.6847024142942414e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27969104051589966,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4865.6,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.852159468438538,
|
|
"grad_norm": 0.4380812594820064,
|
|
"learning_rate": 3.6802230589923e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3017381727695465,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5803.8,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 1.8604651162790697,
|
|
"grad_norm": 0.45933277534664796,
|
|
"learning_rate": 3.6757148720457336e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29320573806762695,
|
|
"step": 1120,
|
|
"valid_targets_mean": 6492.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.8687707641196014,
|
|
"grad_norm": 0.5368760431251804,
|
|
"learning_rate": 3.671177930812392e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31470629572868347,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5605.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 1.8770764119601329,
|
|
"grad_norm": 0.49482924932810735,
|
|
"learning_rate": 3.6666123131435336e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32989320158958435,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4802.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.8853820598006643,
|
|
"grad_norm": 0.4449269876401289,
|
|
"learning_rate": 3.6620180973824846e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27926716208457947,
|
|
"step": 1135,
|
|
"valid_targets_mean": 6146.9,
|
|
"valid_targets_min": 3128
|
|
},
|
|
{
|
|
"epoch": 1.893687707641196,
|
|
"grad_norm": 0.4315611717149053,
|
|
"learning_rate": 3.657395362363301e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31050562858581543,
|
|
"step": 1140,
|
|
"valid_targets_mean": 6205.6,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.9019933554817277,
|
|
"grad_norm": 0.4644079426581674,
|
|
"learning_rate": 3.65274418740941e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30840790271759033,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5077.1,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.910299003322259,
|
|
"grad_norm": 0.4748376011485845,
|
|
"learning_rate": 3.648064652332253e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30166974663734436,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4474.3,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 1.9186046511627906,
|
|
"grad_norm": 0.45206888537004103,
|
|
"learning_rate": 3.643356837429914e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26346683502197266,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4987.3,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 1.9269102990033222,
|
|
"grad_norm": 0.47144418809948196,
|
|
"learning_rate": 3.6386208234857424e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808268666267395,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5031.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.935215946843854,
|
|
"grad_norm": 0.4366788156050008,
|
|
"learning_rate": 3.633856691766966e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27548161149024963,
|
|
"step": 1165,
|
|
"valid_targets_mean": 6065.8,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 1.9435215946843853,
|
|
"grad_norm": 0.47576414714571424,
|
|
"learning_rate": 3.629064524023298e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284735769033432,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4694.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.9518272425249168,
|
|
"grad_norm": 0.48136864486710357,
|
|
"learning_rate": 3.624244402485533e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554246783256531,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4542.1,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.9601328903654485,
|
|
"grad_norm": 0.4960733348097461,
|
|
"learning_rate": 3.6193964098641365e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28571632504463196,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4600.6,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.9684385382059801,
|
|
"grad_norm": 0.4535092384203594,
|
|
"learning_rate": 3.614520629347825e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26897484064102173,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5831.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.9767441860465116,
|
|
"grad_norm": 0.44309011980261487,
|
|
"learning_rate": 3.60961714460214e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599504590034485,
|
|
"step": 1190,
|
|
"valid_targets_mean": 6035.4,
|
|
"valid_targets_min": 2812
|
|
},
|
|
{
|
|
"epoch": 1.985049833887043,
|
|
"grad_norm": 0.4481800439557574,
|
|
"learning_rate": 3.604686039768011e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29655835032463074,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4944.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.9933554817275747,
|
|
"grad_norm": 0.48630462087437465,
|
|
"learning_rate": 3.599727399460312e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3129684329032898,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5485.2,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.0016611295681064,
|
|
"grad_norm": 0.4294590392329195,
|
|
"learning_rate": 3.594741308766408e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822207808494568,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5766.6,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.009966777408638,
|
|
"grad_norm": 0.4549501936764837,
|
|
"learning_rate": 3.589727853244699e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266976535320282,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5464.2,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 2.0182724252491693,
|
|
"grad_norm": 0.5207466297162416,
|
|
"learning_rate": 3.584687118923149e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081488311290741,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5012.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 2.026578073089701,
|
|
"grad_norm": 0.41086521814512084,
|
|
"learning_rate": 3.5796191922978094e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2226308137178421,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5500.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 2.0348837209302326,
|
|
"grad_norm": 0.4975927944468973,
|
|
"learning_rate": 3.574524160331335e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28195029497146606,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4297.9,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 2.043189368770764,
|
|
"grad_norm": 0.46698848903006257,
|
|
"learning_rate": 3.569402110451494e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27648794651031494,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4990.7,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.0514950166112955,
|
|
"grad_norm": 0.5043487185875198,
|
|
"learning_rate": 3.564253130549666e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24663381278514862,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4033.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.0598006644518274,
|
|
"grad_norm": 0.4343404364137587,
|
|
"learning_rate": 3.559077308979332e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27355989813804626,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5773.1,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.068106312292359,
|
|
"grad_norm": 0.4590067953919732,
|
|
"learning_rate": 3.553874734554565e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23863957822322845,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4645.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.0764119601328903,
|
|
"grad_norm": 0.4445814554220329,
|
|
"learning_rate": 3.548645496548495e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370821088552475,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5588.4,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 2.0847176079734218,
|
|
"grad_norm": 0.489250910417311,
|
|
"learning_rate": 3.543389684691788e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316930890083313,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4202.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 2.0930232558139537,
|
|
"grad_norm": 0.4500980896193309,
|
|
"learning_rate": 3.5381073891711e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21102972328662872,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4607.6,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 2.101328903654485,
|
|
"grad_norm": 0.4393385601742689,
|
|
"learning_rate": 3.532798700627532e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757747769355774,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5341.6,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.1096345514950166,
|
|
"grad_norm": 0.4482571062484432,
|
|
"learning_rate": 3.5274637101550737e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288438081741333,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5328.2,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 2.117940199335548,
|
|
"grad_norm": 0.4446911056940034,
|
|
"learning_rate": 3.522102509299039e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24938148260116577,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4701.7,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 2.12624584717608,
|
|
"grad_norm": 0.5077561517669842,
|
|
"learning_rate": 3.516715190054499e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23901741206645966,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4044.7,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.1345514950166113,
|
|
"grad_norm": 0.4094195461131985,
|
|
"learning_rate": 3.511301844864699e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26339417695999146,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6443.6,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.43535819773255147,
|
|
"learning_rate": 3.505862566619476e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003505766391754,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5341.1,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 2.1511627906976742,
|
|
"grad_norm": 0.47857416418041365,
|
|
"learning_rate": 3.5003974486536614e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27127063274383545,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4545.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.159468438538206,
|
|
"grad_norm": 0.4322030627101844,
|
|
"learning_rate": 3.4949065847454835e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682017385959625,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5062.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 2.1677740863787376,
|
|
"grad_norm": 0.42903527001853586,
|
|
"learning_rate": 3.4893900691149525e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25989386439323425,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5187.6,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 2.176079734219269,
|
|
"grad_norm": 0.4205389055300957,
|
|
"learning_rate": 3.48384799642225e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943001687526703,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6685.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.1843853820598005,
|
|
"grad_norm": 0.4630087521882423,
|
|
"learning_rate": 3.478280461766101e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26686030626296997,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4921.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 2.1926910299003324,
|
|
"grad_norm": 0.5182762226876729,
|
|
"learning_rate": 3.4726875606821416e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3340189456939697,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5268.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 2.200996677740864,
|
|
"grad_norm": 0.532933703891305,
|
|
"learning_rate": 3.467069389141284e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983317971229553,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4255.9,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 2.2093023255813953,
|
|
"grad_norm": 0.46794262758347926,
|
|
"learning_rate": 3.461426043548063e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262519896030426,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5030.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 2.2176079734219267,
|
|
"grad_norm": 0.465142432766031,
|
|
"learning_rate": 3.455757620738989e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22782957553863525,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4517.8,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 2.2259136212624586,
|
|
"grad_norm": 0.4371316369721788,
|
|
"learning_rate": 3.4500642179808785e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929585576057434,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6145.2,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.23421926910299,
|
|
"grad_norm": 0.41840596747104825,
|
|
"learning_rate": 3.444345932969193e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651864290237427,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5451.1,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.2425249169435215,
|
|
"grad_norm": 0.47355942430967457,
|
|
"learning_rate": 3.438602863826359e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29457277059555054,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5124.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 2.250830564784053,
|
|
"grad_norm": 0.4340137857651024,
|
|
"learning_rate": 3.43283510910008e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28043505549430847,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5377.3,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.259136212624585,
|
|
"grad_norm": 0.4478549902073935,
|
|
"learning_rate": 3.427042767761655e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26396727561950684,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4962.2,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.2674418604651163,
|
|
"grad_norm": 0.46234850544635087,
|
|
"learning_rate": 3.421225939204271e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27198857069015503,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5010.7,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 2.2757475083056478,
|
|
"grad_norm": 0.5119154388181445,
|
|
"learning_rate": 3.415384723241302e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083427846431732,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4914.3,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 2.284053156146179,
|
|
"grad_norm": 0.49086984248211474,
|
|
"learning_rate": 3.4095192201045984e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23466429114341736,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3696.0,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 2.292358803986711,
|
|
"grad_norm": 0.5396024888062844,
|
|
"learning_rate": 3.403629530442761e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596532702445984,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3863.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 2.3006644518272426,
|
|
"grad_norm": 0.48601071833339743,
|
|
"learning_rate": 3.397715755319419e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24594782292842865,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4063.3,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.308970099667774,
|
|
"grad_norm": 0.393734194840965,
|
|
"learning_rate": 3.3917779962114935e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2461731880903244,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5947.2,
|
|
"valid_targets_min": 2792
|
|
},
|
|
{
|
|
"epoch": 2.3172757475083055,
|
|
"grad_norm": 0.4589422156266819,
|
|
"learning_rate": 3.385816355007455e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545913755893707,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5429.3,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 2.3255813953488373,
|
|
"grad_norm": 0.44426032095873974,
|
|
"learning_rate": 3.3798309340055806e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26692935824394226,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5145.1,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 2.333887043189369,
|
|
"grad_norm": 0.424037441312101,
|
|
"learning_rate": 3.373821835912192e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334865927696228,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5525.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.3421926910299002,
|
|
"grad_norm": 0.4098818567305886,
|
|
"learning_rate": 3.3677891638398975e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011425733566284,
|
|
"step": 1410,
|
|
"valid_targets_mean": 6637.2,
|
|
"valid_targets_min": 3369
|
|
},
|
|
{
|
|
"epoch": 2.350498338870432,
|
|
"grad_norm": 0.4284470643391314,
|
|
"learning_rate": 3.361733021305819e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30870482325553894,
|
|
"step": 1415,
|
|
"valid_targets_mean": 6715.5,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 2.3588039867109636,
|
|
"grad_norm": 0.463540209843553,
|
|
"learning_rate": 3.3556535122298194e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925174832344055,
|
|
"step": 1420,
|
|
"valid_targets_mean": 5314.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.367109634551495,
|
|
"grad_norm": 0.44763966739219957,
|
|
"learning_rate": 3.349550740932717e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23355399072170258,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4276.5,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 2.3754152823920265,
|
|
"grad_norm": 0.43338408902314346,
|
|
"learning_rate": 3.343424812134498e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25443848967552185,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5041.5,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.383720930232558,
|
|
"grad_norm": 0.4557302225262526,
|
|
"learning_rate": 3.337275830952515e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013466000556946,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5353.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 2.39202657807309,
|
|
"grad_norm": 0.4638406025118101,
|
|
"learning_rate": 3.3311039028996886e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2358340620994568,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4046.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.4003322259136213,
|
|
"grad_norm": 0.40328844830201044,
|
|
"learning_rate": 3.324909133882692e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24890413880348206,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5336.2,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 2.4086378737541527,
|
|
"grad_norm": 0.46179321067589824,
|
|
"learning_rate": 3.318691630200138e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097371757030487,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5390.4,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.4169435215946846,
|
|
"grad_norm": 0.4419182828075534,
|
|
"learning_rate": 3.312451498540751e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645336985588074,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5290.0,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 2.425249169435216,
|
|
"grad_norm": 0.45291888724142526,
|
|
"learning_rate": 3.306188845981541e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26647859811782837,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4978.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.4335548172757475,
|
|
"grad_norm": 0.5566196972407679,
|
|
"learning_rate": 3.2999037799859604e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677317261695862,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3071.0,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.441860465116279,
|
|
"grad_norm": 0.48537805790452504,
|
|
"learning_rate": 3.2935964084020646e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24786819517612457,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4419.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 2.4501661129568104,
|
|
"grad_norm": 0.44024232879781666,
|
|
"learning_rate": 3.2872668394606596e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23396417498588562,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4355.9,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.4584717607973423,
|
|
"grad_norm": 0.3984989803393188,
|
|
"learning_rate": 3.280915181773445e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838985323905945,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5822.4,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 2.4667774086378738,
|
|
"grad_norm": 0.443168070314262,
|
|
"learning_rate": 3.274541544331151e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694739103317261,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5391.1,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 2.475083056478405,
|
|
"grad_norm": 0.4103632732950197,
|
|
"learning_rate": 3.2681460365016664e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245636224746704,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6737.3,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.483388704318937,
|
|
"grad_norm": 0.45946365834404296,
|
|
"learning_rate": 3.261728768028166e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28621888160705566,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4737.0,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.4916943521594686,
|
|
"grad_norm": 0.5488184560267589,
|
|
"learning_rate": 3.255289849027218e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2997356355190277,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4845.4,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.43142268964154834,
|
|
"learning_rate": 3.248829389986908e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25421881675720215,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 3058
|
|
},
|
|
{
|
|
"epoch": 2.5083056478405314,
|
|
"grad_norm": 0.44274248570760794,
|
|
"learning_rate": 3.2423475017649324e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921995222568512,
|
|
"step": 1510,
|
|
"valid_targets_mean": 5899.9,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 2.516611295681063,
|
|
"grad_norm": 0.4098390396712543,
|
|
"learning_rate": 3.235844295586702e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28043097257614136,
|
|
"step": 1515,
|
|
"valid_targets_mean": 6987.8,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.524916943521595,
|
|
"grad_norm": 0.47544423362460914,
|
|
"learning_rate": 3.229319883043428e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27716541290283203,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4321.4,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 2.5332225913621262,
|
|
"grad_norm": 0.5648934394271622,
|
|
"learning_rate": 3.2227743760902125e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022196292877197,
|
|
"step": 1525,
|
|
"valid_targets_mean": 5043.6,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 2.5415282392026577,
|
|
"grad_norm": 0.4505318673413579,
|
|
"learning_rate": 3.2162078870441273e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018271327018738,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4957.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.5498338870431896,
|
|
"grad_norm": 0.4729152560726153,
|
|
"learning_rate": 3.20962052858228e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27208223938941956,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5751.6,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 2.558139534883721,
|
|
"grad_norm": 0.46608933498524324,
|
|
"learning_rate": 3.203012413739889e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24637091159820557,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4847.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.5664451827242525,
|
|
"grad_norm": 0.4304901316466138,
|
|
"learning_rate": 3.196383655908338e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24189898371696472,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5632.8,
|
|
"valid_targets_min": 2939
|
|
},
|
|
{
|
|
"epoch": 2.574750830564784,
|
|
"grad_norm": 0.44260932917901147,
|
|
"learning_rate": 3.1897343688332334e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684367299079895,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5640.1,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 2.5830564784053154,
|
|
"grad_norm": 0.4621921573945727,
|
|
"learning_rate": 3.183064666612451e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.227268785238266,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4492.8,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.5913621262458473,
|
|
"grad_norm": 0.4232751033672106,
|
|
"learning_rate": 3.176374663694179e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25456172227859497,
|
|
"step": 1560,
|
|
"valid_targets_mean": 5725.9,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 2.5996677740863787,
|
|
"grad_norm": 0.43637689608641045,
|
|
"learning_rate": 3.169664474874953e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27117669582366943,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5533.2,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 2.60797342192691,
|
|
"grad_norm": 0.47315607951872657,
|
|
"learning_rate": 3.162934215297687e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23831763863563538,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4507.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 2.616279069767442,
|
|
"grad_norm": 0.43517652617132113,
|
|
"learning_rate": 3.156184000449697e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27723631262779236,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5730.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 2.6245847176079735,
|
|
"grad_norm": 0.45376740307626157,
|
|
"learning_rate": 3.1494139461607214e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3351346552371979,
|
|
"step": 1580,
|
|
"valid_targets_mean": 6202.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.632890365448505,
|
|
"grad_norm": 0.458011854514846,
|
|
"learning_rate": 3.142624168600931e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508060038089752,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4493.3,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 2.6411960132890364,
|
|
"grad_norm": 0.5582302346376129,
|
|
"learning_rate": 3.135814784278934e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3375672698020935,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4123.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 2.649501661129568,
|
|
"grad_norm": 0.4642457784185591,
|
|
"learning_rate": 3.128985910039784e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262742280960083,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4733.5,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.6578073089700998,
|
|
"grad_norm": 0.4337293181321357,
|
|
"learning_rate": 3.122137663062966e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720309793949127,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5775.5,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.666112956810631,
|
|
"grad_norm": 0.43528300374610834,
|
|
"learning_rate": 3.115270160860392e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24604466557502747,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4725.1,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.6744186046511627,
|
|
"grad_norm": 0.40561450659900333,
|
|
"learning_rate": 3.108383521274381e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27057740092277527,
|
|
"step": 1610,
|
|
"valid_targets_mean": 7070.9,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 2.6827242524916945,
|
|
"grad_norm": 0.4037184942550861,
|
|
"learning_rate": 3.10147786247564e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22859853506088257,
|
|
"step": 1615,
|
|
"valid_targets_mean": 5642.7,
|
|
"valid_targets_min": 2972
|
|
},
|
|
{
|
|
"epoch": 2.691029900332226,
|
|
"grad_norm": 0.3852507216084121,
|
|
"learning_rate": 3.094553302961232e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2409987449645996,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5629.8,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 2.6993355481727574,
|
|
"grad_norm": 0.46431961667373245,
|
|
"learning_rate": 3.087609961552549e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2416231334209442,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4440.2,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.707641196013289,
|
|
"grad_norm": 0.44803720294906446,
|
|
"learning_rate": 3.080647957393266e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24416396021842957,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4486.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 2.7159468438538203,
|
|
"grad_norm": 0.47345955057268935,
|
|
"learning_rate": 3.073667409947301e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600746154785156,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4554.4,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 2.7242524916943522,
|
|
"grad_norm": 0.45684559871189584,
|
|
"learning_rate": 3.066668438996764e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24669149518013,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5084.7,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 2.7325581395348837,
|
|
"grad_norm": 0.4537092792413825,
|
|
"learning_rate": 3.0596511646399005e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27298837900161743,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4619.2,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.740863787375415,
|
|
"grad_norm": 0.427057616610351,
|
|
"learning_rate": 3.052615707289035e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24815677106380463,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4739.0,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 2.749169435215947,
|
|
"grad_norm": 0.49147050879910953,
|
|
"learning_rate": 3.0455621876684987e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30739930272102356,
|
|
"step": 1655,
|
|
"valid_targets_mean": 6203.4,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.7574750830564785,
|
|
"grad_norm": 0.4477012303808159,
|
|
"learning_rate": 3.03849072681256e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24040789902210236,
|
|
"step": 1660,
|
|
"valid_targets_mean": 5248.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.76578073089701,
|
|
"grad_norm": 0.42465897093156435,
|
|
"learning_rate": 3.0314014460633515e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396584451198578,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5058.1,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 2.7740863787375414,
|
|
"grad_norm": 0.472786563376718,
|
|
"learning_rate": 3.0242944670687825e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946327328681946,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5392.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.782392026578073,
|
|
"grad_norm": 0.4491425687173453,
|
|
"learning_rate": 3.017169911780453e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656497359275818,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4919.6,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 2.7906976744186047,
|
|
"grad_norm": 0.39259584150854826,
|
|
"learning_rate": 3.010027902451566e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255082368850708,
|
|
"step": 1680,
|
|
"valid_targets_mean": 6282.9,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 2.799003322259136,
|
|
"grad_norm": 0.45853740567940243,
|
|
"learning_rate": 3.0028685616348205e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24521677196025848,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4771.7,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 2.8073089700996676,
|
|
"grad_norm": 0.4468898326321928,
|
|
"learning_rate": 2.9956920121803174e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20200932025909424,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4403.9,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.8156146179401995,
|
|
"grad_norm": 0.5189266455987487,
|
|
"learning_rate": 2.988498377233446e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545081377029419,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3768.5,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 2.823920265780731,
|
|
"grad_norm": 0.4386468683499696,
|
|
"learning_rate": 2.981287780232774e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30795300006866455,
|
|
"step": 1700,
|
|
"valid_targets_mean": 5660.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.8322259136212624,
|
|
"grad_norm": 0.4726895982691763,
|
|
"learning_rate": 2.9740603449079253e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410452514886856,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4373.4,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 2.840531561461794,
|
|
"grad_norm": 0.41842458190369447,
|
|
"learning_rate": 2.966816195277463e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549525499343872,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5729.3,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 2.8488372093023253,
|
|
"grad_norm": 0.46760283695373217,
|
|
"learning_rate": 2.9595554556467557e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544372081756592,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4336.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.44349434482304506,
|
|
"learning_rate": 2.952278250605847e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27876293659210205,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4730.6,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.8654485049833887,
|
|
"grad_norm": 0.4004366991765892,
|
|
"learning_rate": 2.9449847050273193e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20823951065540314,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4909.4,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.87375415282392,
|
|
"grad_norm": 0.4441308619263069,
|
|
"learning_rate": 2.9376749440641457e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311842143535614,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4354.6,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 2.882059800664452,
|
|
"grad_norm": 0.4190493909520609,
|
|
"learning_rate": 2.930349093147548e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21783331036567688,
|
|
"step": 1735,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 2.8903654485049834,
|
|
"grad_norm": 0.4450097945712441,
|
|
"learning_rate": 2.9230072779848418e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2393600046634674,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4743.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.898671096345515,
|
|
"grad_norm": 0.49652098610784756,
|
|
"learning_rate": 2.9156496245572802e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24928247928619385,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4249.3,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 2.9069767441860463,
|
|
"grad_norm": 0.39986795119095947,
|
|
"learning_rate": 2.90827625911789e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713518738746643,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7038.9,
|
|
"valid_targets_min": 3470
|
|
},
|
|
{
|
|
"epoch": 2.915282392026578,
|
|
"grad_norm": 0.4308032635208157,
|
|
"learning_rate": 2.9008873081893096e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25884345173835754,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5234.0,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 2.9235880398671097,
|
|
"grad_norm": 0.45938596524355774,
|
|
"learning_rate": 2.8934828985616122e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25125858187675476,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4817.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.931893687707641,
|
|
"grad_norm": 0.4006714620456045,
|
|
"learning_rate": 2.8860631572901367e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22639954090118408,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5933.6,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 2.940199335548173,
|
|
"grad_norm": 0.5083518794237236,
|
|
"learning_rate": 2.8786282116933003e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655089199542999,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3783.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.9485049833887045,
|
|
"grad_norm": 0.4850203717002092,
|
|
"learning_rate": 2.8711781893504204e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23813170194625854,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4349.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 2.956810631229236,
|
|
"grad_norm": 0.4204152414711097,
|
|
"learning_rate": 2.8637132180995215e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2421441227197647,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5999.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.9651162790697674,
|
|
"grad_norm": 0.41391998671704194,
|
|
"learning_rate": 2.8562334260351425e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790008783340454,
|
|
"step": 1785,
|
|
"valid_targets_mean": 6181.3,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.973421926910299,
|
|
"grad_norm": 0.47354799327355124,
|
|
"learning_rate": 2.8487389415061392e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26098933815956116,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4501.9,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 2.9817275747508307,
|
|
"grad_norm": 0.447616877588483,
|
|
"learning_rate": 2.8412298931134805e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3207198977470398,
|
|
"step": 1795,
|
|
"valid_targets_mean": 6386.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.990033222591362,
|
|
"grad_norm": 0.43009881647596004,
|
|
"learning_rate": 2.833706409708045e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26786935329437256,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5866.3,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 2.9983388704318936,
|
|
"grad_norm": 0.45806672078655813,
|
|
"learning_rate": 2.8261686203884035e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657933831214905,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4870.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.006644518272425,
|
|
"grad_norm": 0.48756213898029266,
|
|
"learning_rate": 2.8186166544986123e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178192377090454,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5223.4,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.014950166112957,
|
|
"grad_norm": 0.4584894883551614,
|
|
"learning_rate": 2.811050641625988e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3101058900356293,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5944.3,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.0232558139534884,
|
|
"grad_norm": 0.41986696347059055,
|
|
"learning_rate": 2.803470711598884e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24282768368721008,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5541.8,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 3.03156146179402,
|
|
"grad_norm": 0.4444898516597791,
|
|
"learning_rate": 2.795876994484464e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26929450035095215,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5625.6,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 3.0398671096345513,
|
|
"grad_norm": 0.4287603075001185,
|
|
"learning_rate": 2.788269620586472e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730964124202728,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5612.4,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.048172757475083,
|
|
"grad_norm": 0.4214914142712943,
|
|
"learning_rate": 2.780648720442992e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22813311219215393,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5131.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 3.0564784053156147,
|
|
"grad_norm": 0.41979303931802014,
|
|
"learning_rate": 2.7730144248242132e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521560490131378,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5431.8,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 3.064784053156146,
|
|
"grad_norm": 0.5034728616666446,
|
|
"learning_rate": 2.7653668647301797e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23169758915901184,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5721.2,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 3.0730897009966776,
|
|
"grad_norm": 0.4618737009643096,
|
|
"learning_rate": 2.7577061713885494e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33649104833602905,
|
|
"step": 1850,
|
|
"valid_targets_mean": 6305.4,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 3.0813953488372094,
|
|
"grad_norm": 0.46274400893973416,
|
|
"learning_rate": 2.7500324762523387e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859732508659363,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4995.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.089700996677741,
|
|
"grad_norm": 0.41809575893574563,
|
|
"learning_rate": 2.7423459109976648e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579067647457123,
|
|
"step": 1860,
|
|
"valid_targets_mean": 5603.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.0980066445182723,
|
|
"grad_norm": 0.4095547195593541,
|
|
"learning_rate": 2.7346466075214917e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2252475470304489,
|
|
"step": 1865,
|
|
"valid_targets_mean": 5397.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 3.106312292358804,
|
|
"grad_norm": 0.4778646184642439,
|
|
"learning_rate": 2.7269346979393608e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23879939317703247,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4554.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 3.1146179401993357,
|
|
"grad_norm": 0.41051214825536586,
|
|
"learning_rate": 2.71921031458313e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30977991223335266,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6526.5,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 3.122923588039867,
|
|
"grad_norm": 0.39997578532716327,
|
|
"learning_rate": 2.7114735899986966e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25830668210983276,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6155.6,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 3.1312292358803986,
|
|
"grad_norm": 0.4640813543918904,
|
|
"learning_rate": 2.7037246569437287e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26280367374420166,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4656.5,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 3.13953488372093,
|
|
"grad_norm": 0.46567134046555464,
|
|
"learning_rate": 2.6959636483853843e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30310046672821045,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5675.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.147840531561462,
|
|
"grad_norm": 0.495062450594163,
|
|
"learning_rate": 2.688190697498029e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.217759370803833,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3972.2,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 3.1561461794019934,
|
|
"grad_norm": 0.4335877114664538,
|
|
"learning_rate": 2.680405937660952e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27937451004981995,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5689.4,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.164451827242525,
|
|
"grad_norm": 0.4293140553478189,
|
|
"learning_rate": 2.6726095024560782e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26496586203575134,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5370.5,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.1727574750830563,
|
|
"grad_norm": 0.494319783975607,
|
|
"learning_rate": 2.6648015256656727e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2480391263961792,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3761.9,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 3.181063122923588,
|
|
"grad_norm": 0.4352128494130891,
|
|
"learning_rate": 2.6569821412700498e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24893486499786377,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4847.8,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 3.1893687707641196,
|
|
"grad_norm": 0.4548896200368843,
|
|
"learning_rate": 2.6491514834452713e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22502918541431427,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4866.4,
|
|
"valid_targets_min": 2547
|
|
},
|
|
{
|
|
"epoch": 3.197674418604651,
|
|
"grad_norm": 0.44443118056607456,
|
|
"learning_rate": 2.6413096865608426e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815675437450409,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5384.9,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 3.2059800664451825,
|
|
"grad_norm": 0.4525167641958294,
|
|
"learning_rate": 2.633456885177412e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22947941720485687,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4632.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.2142857142857144,
|
|
"grad_norm": 0.45742510482187737,
|
|
"learning_rate": 2.6255932140444546e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661646008491516,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4918.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 3.222591362126246,
|
|
"grad_norm": 0.479542216256229,
|
|
"learning_rate": 2.6177188080979675e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25829941034317017,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4850.4,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 3.2308970099667773,
|
|
"grad_norm": 0.4590312253075504,
|
|
"learning_rate": 2.609833802458149e-05,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22883078455924988,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4768.6,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 3.2392026578073088,
|
|
"grad_norm": 0.43247461832359885,
|
|
"learning_rate": 2.601938332427083e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27305081486701965,
|
|
"step": 1950,
|
|
"valid_targets_mean": 6330.2,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 3.2475083056478407,
|
|
"grad_norm": 0.4334679729519335,
|
|
"learning_rate": 2.594032533486415e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502228915691376,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4989.4,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 3.255813953488372,
|
|
"grad_norm": 0.3957591876363261,
|
|
"learning_rate": 2.58611654129503e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222408652305603,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5987.6,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 3.2641196013289036,
|
|
"grad_norm": 0.47435128849399544,
|
|
"learning_rate": 2.578190491686722e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528674602508545,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4466.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 3.2724252491694354,
|
|
"grad_norm": 0.4359827559506875,
|
|
"learning_rate": 2.5702545206678658e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23913633823394775,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4937.3,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 3.280730897009967,
|
|
"grad_norm": 0.44645335217374366,
|
|
"learning_rate": 2.5623087644150792e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25028175115585327,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4789.1,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 3.2890365448504983,
|
|
"grad_norm": 0.5039758941343888,
|
|
"learning_rate": 2.5543533592728924e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616705596446991,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4289.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 3.29734219269103,
|
|
"grad_norm": 0.40967825115682727,
|
|
"learning_rate": 2.5463884417514024e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22789421677589417,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5516.2,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 3.3056478405315612,
|
|
"grad_norm": 0.4206255946984235,
|
|
"learning_rate": 2.538414148523933e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24894721806049347,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5561.6,
|
|
"valid_targets_min": 2853
|
|
},
|
|
{
|
|
"epoch": 3.313953488372093,
|
|
"grad_norm": 0.46388655537695356,
|
|
"learning_rate": 2.5304306164246916e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24674175679683685,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5036.0,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 3.3222591362126246,
|
|
"grad_norm": 0.45175133899982206,
|
|
"learning_rate": 2.522437982446417e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735620141029358,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5053.1,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 3.330564784053156,
|
|
"grad_norm": 0.42013561492453866,
|
|
"learning_rate": 2.5144363837380324e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491709589958191,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5402.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 3.338870431893688,
|
|
"grad_norm": 0.48859569096522687,
|
|
"learning_rate": 2.5064259576022904e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755495309829712,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4419.6,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 3.3471760797342194,
|
|
"grad_norm": 0.4629645810789966,
|
|
"learning_rate": 2.4984068414934162e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28467848896980286,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4924.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.355481727574751,
|
|
"grad_norm": 0.4541867685229341,
|
|
"learning_rate": 2.4903791730147502e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24856264889240265,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5438.4,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 3.3637873754152823,
|
|
"grad_norm": 0.46440158047390057,
|
|
"learning_rate": 2.4823430899163877e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2464272528886795,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4789.6,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 3.3720930232558137,
|
|
"grad_norm": 0.4532666544869348,
|
|
"learning_rate": 2.4742987300928114e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597007751464844,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4619.6,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.3803986710963456,
|
|
"grad_norm": 0.5658505888856132,
|
|
"learning_rate": 2.46624623158053e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25855278968811035,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3548.6,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 3.388704318936877,
|
|
"grad_norm": 0.5298802027475776,
|
|
"learning_rate": 2.458185732555707e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24313034117221832,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3203.6,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 3.3970099667774085,
|
|
"grad_norm": 0.4520375656237862,
|
|
"learning_rate": 2.450117371331789e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573382258415222,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4790.2,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.4053156146179404,
|
|
"grad_norm": 0.47287393626888513,
|
|
"learning_rate": 2.4420412863571347e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24323126673698425,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4615.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 3.413621262458472,
|
|
"grad_norm": 0.436425617603904,
|
|
"learning_rate": 2.4339576162126362e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24149653315544128,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4753.5,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 3.4219269102990033,
|
|
"grad_norm": 0.49889705115610605,
|
|
"learning_rate": 2.4258664996093443e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260328084230423,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3393.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.4302325581395348,
|
|
"grad_norm": 0.43226134852990633,
|
|
"learning_rate": 2.4177680753860857e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022438585758209,
|
|
"step": 2065,
|
|
"valid_targets_mean": 5449.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.438538205980066,
|
|
"grad_norm": 0.4121409672501697,
|
|
"learning_rate": 2.4096624825070814e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625482380390167,
|
|
"step": 2070,
|
|
"valid_targets_mean": 5543.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.446843853820598,
|
|
"grad_norm": 0.4226531648464987,
|
|
"learning_rate": 2.4015498600595635e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25384968519210815,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5894.9,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 3.4551495016611296,
|
|
"grad_norm": 0.4334536045976772,
|
|
"learning_rate": 2.3934303472513864e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791216969490051,
|
|
"step": 2080,
|
|
"valid_targets_mean": 6731.2,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.463455149501661,
|
|
"grad_norm": 0.45346751328358836,
|
|
"learning_rate": 2.385304083408639e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29424208402633667,
|
|
"step": 2085,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 3.471760797342193,
|
|
"grad_norm": 0.4285141275640956,
|
|
"learning_rate": 2.377171207973255e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264001965522766,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5329.9,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.4800664451827243,
|
|
"grad_norm": 0.41844803062505703,
|
|
"learning_rate": 2.3690318605006173e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259234219789505,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5186.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.488372093023256,
|
|
"grad_norm": 0.5020072486528517,
|
|
"learning_rate": 2.3608861806571674e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2347356081008911,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3694.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.4966777408637872,
|
|
"grad_norm": 0.421382518216441,
|
|
"learning_rate": 2.3527343082180053e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615286111831665,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5689.5,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.5049833887043187,
|
|
"grad_norm": 0.4045700221459861,
|
|
"learning_rate": 2.3445763830644924e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27072131633758545,
|
|
"step": 2110,
|
|
"valid_targets_mean": 6004.7,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 3.5132890365448506,
|
|
"grad_norm": 0.537968450903384,
|
|
"learning_rate": 2.3364125451818515e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23445047438144684,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3921.8,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 3.521594684385382,
|
|
"grad_norm": 0.4363420533017999,
|
|
"learning_rate": 2.3282429346567635e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956826090812683,
|
|
"step": 2120,
|
|
"valid_targets_mean": 6480.6,
|
|
"valid_targets_min": 2987
|
|
},
|
|
{
|
|
"epoch": 3.5299003322259135,
|
|
"grad_norm": 0.3877193994285806,
|
|
"learning_rate": 2.3200676916749658e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2337704449892044,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5798.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 3.5382059800664454,
|
|
"grad_norm": 0.40830512433352983,
|
|
"learning_rate": 2.3118869565188448e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29106971621513367,
|
|
"step": 2130,
|
|
"valid_targets_mean": 6556.8,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 3.546511627906977,
|
|
"grad_norm": 0.4892205874918003,
|
|
"learning_rate": 2.3037008695650292e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25906628370285034,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4154.1,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.5548172757475083,
|
|
"grad_norm": 0.4838813836528523,
|
|
"learning_rate": 2.295509571281981e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609056830406189,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3758.1,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 3.5631229235880397,
|
|
"grad_norm": 0.41224322612506187,
|
|
"learning_rate": 2.287313202227588e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23928983509540558,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5345.9,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.4475757703374575,
|
|
"learning_rate": 2.279111903046745e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679324746131897,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5610.9,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.579734219269103,
|
|
"grad_norm": 0.5295534546970732,
|
|
"learning_rate": 2.2709058144689498e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21889519691467285,
|
|
"step": 2155,
|
|
"valid_targets_mean": 6015.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 3.5880398671096345,
|
|
"grad_norm": 0.4703886280593987,
|
|
"learning_rate": 2.26269507730588e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30911919474601746,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5465.7,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 3.596345514950166,
|
|
"grad_norm": 0.42963308319607607,
|
|
"learning_rate": 2.2544798324489814e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2432975172996521,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4981.4,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 3.604651162790698,
|
|
"grad_norm": 0.4319219238363922,
|
|
"learning_rate": 2.2462602208670496e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27774038910865784,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5428.0,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.6129568106312293,
|
|
"grad_norm": 0.39567739647432826,
|
|
"learning_rate": 2.23803638360381e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30346381664276123,
|
|
"step": 2175,
|
|
"valid_targets_mean": 7376.1,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 3.6212624584717608,
|
|
"grad_norm": 0.42892781462731566,
|
|
"learning_rate": 2.229808461775498e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2259696125984192,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4895.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 3.629568106312292,
|
|
"grad_norm": 0.41443480984172965,
|
|
"learning_rate": 2.221576596568439e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932804822921753,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6830.4,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 3.6378737541528237,
|
|
"grad_norm": 0.4905140015993029,
|
|
"learning_rate": 2.2133409292366234e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26416388154029846,
|
|
"step": 2190,
|
|
"valid_targets_mean": 6452.2,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 3.6461794019933556,
|
|
"grad_norm": 0.4482423305315421,
|
|
"learning_rate": 2.2051016010992834e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2456321120262146,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4418.6,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.654485049833887,
|
|
"grad_norm": 0.4782699448575384,
|
|
"learning_rate": 2.1968587535384697e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004859685897827,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5183.2,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 3.6627906976744184,
|
|
"grad_norm": 0.48940080349357473,
|
|
"learning_rate": 2.1886125279966238e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2421371042728424,
|
|
"step": 2205,
|
|
"valid_targets_mean": 5129.7,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.6710963455149503,
|
|
"grad_norm": 0.43270249733102634,
|
|
"learning_rate": 2.1803630659741516e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2034202516078949,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4217.0,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.679401993355482,
|
|
"grad_norm": 0.4241925468017185,
|
|
"learning_rate": 2.172110509026994e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21074241399765015,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4703.1,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 3.6877076411960132,
|
|
"grad_norm": 0.45039656471019585,
|
|
"learning_rate": 2.1638549987641996e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2449452131986618,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4897.4,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.6960132890365447,
|
|
"grad_norm": 0.4531352105698492,
|
|
"learning_rate": 2.155596676845496e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20737116038799286,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3827.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.704318936877076,
|
|
"grad_norm": 0.3986048547950102,
|
|
"learning_rate": 2.1473356849788538e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939234972000122,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6707.2,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 3.712624584717608,
|
|
"grad_norm": 0.44867595165777796,
|
|
"learning_rate": 2.139072164918061e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23817504942417145,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4521.2,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 3.7209302325581395,
|
|
"grad_norm": 0.4956407998568628,
|
|
"learning_rate": 2.1308062584602865e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28316932916641235,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4014.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.729235880398671,
|
|
"grad_norm": 0.4874716052259617,
|
|
"learning_rate": 2.1225381074436493e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25550392270088196,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3782.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 3.737541528239203,
|
|
"grad_norm": 0.4319032819308933,
|
|
"learning_rate": 2.114267853744783e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2293645590543747,
|
|
"step": 2250,
|
|
"valid_targets_mean": 5008.8,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.7458471760797343,
|
|
"grad_norm": 0.3993146019561641,
|
|
"learning_rate": 2.1059956392764022e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24126876890659332,
|
|
"step": 2255,
|
|
"valid_targets_mean": 6221.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.7541528239202657,
|
|
"grad_norm": 0.47376552500763236,
|
|
"learning_rate": 2.097721605984868e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536640763282776,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4881.3,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 3.762458471760797,
|
|
"grad_norm": 0.42602442081261027,
|
|
"learning_rate": 2.08944589584775e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696529030799866,
|
|
"step": 2265,
|
|
"valid_targets_mean": 5539.5,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 3.770764119601329,
|
|
"grad_norm": 0.44591276687822873,
|
|
"learning_rate": 2.0811686508713918e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25719332695007324,
|
|
"step": 2270,
|
|
"valid_targets_mean": 5664.1,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.7790697674418605,
|
|
"grad_norm": 0.417337359860667,
|
|
"learning_rate": 2.0728900130884755e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368699014186859,
|
|
"step": 2275,
|
|
"valid_targets_mean": 5110.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 3.787375415282392,
|
|
"grad_norm": 0.38050116398392286,
|
|
"learning_rate": 2.0646101245555812e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23459474742412567,
|
|
"step": 2280,
|
|
"valid_targets_mean": 6279.9,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 3.795681063122924,
|
|
"grad_norm": 0.38645743106600666,
|
|
"learning_rate": 2.0563291273507517e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828667163848877,
|
|
"step": 2285,
|
|
"valid_targets_mean": 6649.1,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.8039867109634553,
|
|
"grad_norm": 0.4159061049408354,
|
|
"learning_rate": 2.0480471635710538e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397889941930771,
|
|
"step": 2290,
|
|
"valid_targets_mean": 5745.3,
|
|
"valid_targets_min": 2423
|
|
},
|
|
{
|
|
"epoch": 3.8122923588039868,
|
|
"grad_norm": 0.4779155072639053,
|
|
"learning_rate": 2.0397643753301403e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968156337738037,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5370.5,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 3.820598006644518,
|
|
"grad_norm": 0.4493985435407889,
|
|
"learning_rate": 2.031480904755812e-05,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28503671288490295,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4574.9,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 3.8289036544850497,
|
|
"grad_norm": 0.3917293323758945,
|
|
"learning_rate": 2.0231968939875766e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2230997085571289,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5489.4,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.8372093023255816,
|
|
"grad_norm": 0.4022290321659444,
|
|
"learning_rate": 2.014912485174213e-05,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22509130835533142,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5163.2,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 3.845514950166113,
|
|
"grad_norm": 0.7065381330570263,
|
|
"learning_rate": 2.0066278204713282e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24468731880187988,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5061.8,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 3.8538205980066444,
|
|
"grad_norm": 0.41468892541056307,
|
|
"learning_rate": 1.9983430420389222e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021647334098816,
|
|
"step": 2320,
|
|
"valid_targets_mean": 6118.0,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.8621262458471763,
|
|
"grad_norm": 0.45927932466072385,
|
|
"learning_rate": 1.9900582920389458e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23224687576293945,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4284.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.870431893687708,
|
|
"grad_norm": 0.4447188189719365,
|
|
"learning_rate": 1.981773712632861e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23178571462631226,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4638.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.8787375415282392,
|
|
"grad_norm": 0.4984602156845261,
|
|
"learning_rate": 1.973489445979204e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22763225436210632,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4073.9,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 3.8870431893687707,
|
|
"grad_norm": 0.5014247772786626,
|
|
"learning_rate": 1.965205634231142e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22031068801879883,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3665.1,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 3.895348837209302,
|
|
"grad_norm": 0.44022557334458295,
|
|
"learning_rate": 1.95692241953404e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24869675934314728,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4938.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 3.903654485049834,
|
|
"grad_norm": 0.4209888156940249,
|
|
"learning_rate": 1.9486399440230142e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26511284708976746,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5608.9,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 3.9119601328903655,
|
|
"grad_norm": 0.38656065841748366,
|
|
"learning_rate": 1.940358349820499e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23406696319580078,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5472.8,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 3.920265780730897,
|
|
"grad_norm": 0.4698096091591882,
|
|
"learning_rate": 1.9320777790338068e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602609395980835,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4214.2,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 3.928571428571429,
|
|
"grad_norm": 0.3906371561004404,
|
|
"learning_rate": 1.9237983737526855e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21064278483390808,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5736.5,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.9368770764119603,
|
|
"grad_norm": 0.4423562226077725,
|
|
"learning_rate": 1.9155202760468868e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25218603014945984,
|
|
"step": 2370,
|
|
"valid_targets_mean": 5165.4,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 3.9451827242524917,
|
|
"grad_norm": 0.4483192176422742,
|
|
"learning_rate": 1.907243627963723e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25742965936660767,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4391.7,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.953488372093023,
|
|
"grad_norm": 0.4249235237254368,
|
|
"learning_rate": 1.898968571525634e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637488842010498,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5526.5,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 3.9617940199335546,
|
|
"grad_norm": 0.414317500061836,
|
|
"learning_rate": 1.890695248727746e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686767876148224,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5721.9,
|
|
"valid_targets_min": 2719
|
|
},
|
|
{
|
|
"epoch": 3.9700996677740865,
|
|
"grad_norm": 0.4499211985650845,
|
|
"learning_rate": 1.882423801535438e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531721293926239,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 3.978405315614618,
|
|
"grad_norm": 0.4716310818499775,
|
|
"learning_rate": 1.8741543718819028e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24488699436187744,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4684.6,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 3.9867109634551494,
|
|
"grad_norm": 0.427963461477616,
|
|
"learning_rate": 1.8658871016657173e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2255929410457611,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5057.8,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.9950166112956813,
|
|
"grad_norm": 0.4803493724910627,
|
|
"learning_rate": 1.8576221327484002e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3161429166793823,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4781.3,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 4.003322259136213,
|
|
"grad_norm": 0.4043726976115266,
|
|
"learning_rate": 1.8493596069519813e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23563313484191895,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5538.6,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 4.011627906976744,
|
|
"grad_norm": 0.46903851280698955,
|
|
"learning_rate": 1.8410996660565714e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645820081233978,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5037.4,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 4.019933554817276,
|
|
"grad_norm": 0.4363323445822905,
|
|
"learning_rate": 1.8328424517979222e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24905908107757568,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5222.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.028239202657807,
|
|
"grad_norm": 0.432621338550339,
|
|
"learning_rate": 1.8245881058649993e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23401597142219543,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5745.1,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.0365448504983386,
|
|
"grad_norm": 0.4123834992888328,
|
|
"learning_rate": 1.816336769897549e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2223263680934906,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5165.8,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 4.04485049833887,
|
|
"grad_norm": 0.4486254073766335,
|
|
"learning_rate": 1.80808858548367e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24337473511695862,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4960.2,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 4.053156146179402,
|
|
"grad_norm": 0.4783353497962553,
|
|
"learning_rate": 1.79984369415738e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24130034446716309,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4411.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 4.061461794019934,
|
|
"grad_norm": 0.4457206750175172,
|
|
"learning_rate": 1.7916022373961908e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23352915048599243,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5020.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.069767441860465,
|
|
"grad_norm": 0.4374095756774514,
|
|
"learning_rate": 1.7833643566186772e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23286865651607513,
|
|
"step": 2450,
|
|
"valid_targets_mean": 5009.9,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 4.078073089700997,
|
|
"grad_norm": 0.3891912214666053,
|
|
"learning_rate": 1.775130193182056e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27232301235198975,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6913.4,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 4.086378737541528,
|
|
"grad_norm": 0.4296913766053193,
|
|
"learning_rate": 1.7668998883797522e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25946512818336487,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5246.1,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 4.09468438538206,
|
|
"grad_norm": 0.42830003200104155,
|
|
"learning_rate": 1.7586735834389813e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23951345682144165,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5105.5,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 4.102990033222591,
|
|
"grad_norm": 0.41806633062608134,
|
|
"learning_rate": 1.750451419518322e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21947532892227173,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5720.9,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 4.1112956810631225,
|
|
"grad_norm": 0.4365218729270944,
|
|
"learning_rate": 1.742233537705299e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21741338074207306,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4444.0,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.119601328903655,
|
|
"grad_norm": 0.4256238021019511,
|
|
"learning_rate": 1.734020079013954e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22710920870304108,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5094.6,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 4.127906976744186,
|
|
"grad_norm": 0.3825235716423473,
|
|
"learning_rate": 1.725811184382433e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252299964427948,
|
|
"step": 2485,
|
|
"valid_targets_mean": 7241.4,
|
|
"valid_targets_min": 2818
|
|
},
|
|
{
|
|
"epoch": 4.136212624584718,
|
|
"grad_norm": 0.40376609953415993,
|
|
"learning_rate": 1.7176069946705654e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20457278192043304,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5410.9,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 4.144518272425249,
|
|
"grad_norm": 0.45662333379822284,
|
|
"learning_rate": 1.7094076506574465e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22901234030723572,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4401.6,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.152823920265781,
|
|
"grad_norm": 0.42470840817626565,
|
|
"learning_rate": 1.7012132930390225e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23266427218914032,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5592.2,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.161129568106312,
|
|
"grad_norm": 0.4704005130722585,
|
|
"learning_rate": 1.6930240624256742e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2143336683511734,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4054.2,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.1694352159468435,
|
|
"grad_norm": 0.43099635331398956,
|
|
"learning_rate": 1.68484009933981e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2324756681919098,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4954.0,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.177740863787376,
|
|
"grad_norm": 0.44412973596759536,
|
|
"learning_rate": 1.6766615442134462e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578473389148712,
|
|
"step": 2515,
|
|
"valid_targets_mean": 5591.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 4.186046511627907,
|
|
"grad_norm": 0.42312931598328635,
|
|
"learning_rate": 1.6684885373858048e-05,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24505099654197693,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5466.5,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.194352159468439,
|
|
"grad_norm": 0.4464884942681951,
|
|
"learning_rate": 1.6603212191009003e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566518187522888,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6842.6,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 4.20265780730897,
|
|
"grad_norm": 0.4626187729596723,
|
|
"learning_rate": 1.6521597295051367e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2384897917509079,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4998.8,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 4.210963455149502,
|
|
"grad_norm": 0.38856933505650343,
|
|
"learning_rate": 1.6440042086449e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20458738505840302,
|
|
"step": 2535,
|
|
"valid_targets_mean": 5734.4,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.219269102990033,
|
|
"grad_norm": 0.5109252968096182,
|
|
"learning_rate": 1.6358547964641556e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27227145433425903,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4560.3,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 4.2275747508305646,
|
|
"grad_norm": 0.46793374891021955,
|
|
"learning_rate": 1.6277116328020503e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658464312553406,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4810.8,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 4.235880398671096,
|
|
"grad_norm": 0.46210513623098926,
|
|
"learning_rate": 1.619574857390507e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697179317474365,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5058.2,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 4.2441860465116275,
|
|
"grad_norm": 0.40735649509212557,
|
|
"learning_rate": 1.611444609851831e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27026858925819397,
|
|
"step": 2555,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.25249169435216,
|
|
"grad_norm": 0.408237462680834,
|
|
"learning_rate": 1.603321029696313e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21054208278656006,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5098.6,
|
|
"valid_targets_min": 2728
|
|
},
|
|
{
|
|
"epoch": 4.260797342192691,
|
|
"grad_norm": 0.43068538477716656,
|
|
"learning_rate": 1.5952042563198364e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22961243987083435,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5076.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 4.269102990033223,
|
|
"grad_norm": 0.4226261212091136,
|
|
"learning_rate": 1.5870944290014833e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25411278009414673,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5722.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 4.277408637873754,
|
|
"grad_norm": 0.42997991101971483,
|
|
"learning_rate": 1.578991686901145e-05,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24188435077667236,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4988.2,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.44280559040301115,
|
|
"learning_rate": 1.5708961690571344e-05,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18886496126651764,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4434.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.294019933554817,
|
|
"grad_norm": 0.4147804751112452,
|
|
"learning_rate": 1.562808014383803e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25827157497406006,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5310.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.3023255813953485,
|
|
"grad_norm": 0.45362878004717755,
|
|
"learning_rate": 1.5547273616691515e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2047729790210724,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4330.2,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.310631229235881,
|
|
"grad_norm": 0.41600449873565803,
|
|
"learning_rate": 1.546654349572453e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21440035104751587,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5823.1,
|
|
"valid_targets_min": 2582
|
|
},
|
|
{
|
|
"epoch": 4.318936877076412,
|
|
"grad_norm": 0.4102626830630782,
|
|
"learning_rate": 1.5385891166218723e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672971487045288,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6161.4,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 4.327242524916944,
|
|
"grad_norm": 0.4282221897247868,
|
|
"learning_rate": 1.5305318012120872e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21236363053321838,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4762.4,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.335548172757475,
|
|
"grad_norm": 0.46786769389738425,
|
|
"learning_rate": 1.5224825416019165e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682391405105591,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4736.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 4.343853820598007,
|
|
"grad_norm": 0.46456884316686503,
|
|
"learning_rate": 1.5144414759119444e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24575237929821014,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4666.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 4.352159468438538,
|
|
"grad_norm": 0.5154302455549746,
|
|
"learning_rate": 1.5064087421221551e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26671287417411804,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3856.9,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 4.3604651162790695,
|
|
"grad_norm": 0.4894187342503253,
|
|
"learning_rate": 1.4983844780695603e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24005022644996643,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5104.9,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 4.368770764119601,
|
|
"grad_norm": 0.4980274648789874,
|
|
"learning_rate": 1.4903688214458356e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24461548030376434,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4008.5,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.377076411960133,
|
|
"grad_norm": 0.4120382340252628,
|
|
"learning_rate": 1.4823619097949584e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21901656687259674,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5171.7,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 4.385382059800665,
|
|
"grad_norm": 0.458870714146468,
|
|
"learning_rate": 1.4743638805108495e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2437673807144165,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4889.1,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 4.393687707641196,
|
|
"grad_norm": 0.42509221088607435,
|
|
"learning_rate": 1.4663748708350114e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23470577597618103,
|
|
"step": 2645,
|
|
"valid_targets_mean": 6057.2,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.401993355481728,
|
|
"grad_norm": 0.4627260734047896,
|
|
"learning_rate": 1.4583950178541755e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28487733006477356,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5108.8,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 4.410299003322259,
|
|
"grad_norm": 0.42221680936684725,
|
|
"learning_rate": 1.4504244584979494e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2026389241218567,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4654.8,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.4186046511627906,
|
|
"grad_norm": 0.4642232667134029,
|
|
"learning_rate": 1.4424633295364701e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26977068185806274,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5551.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 4.426910299003322,
|
|
"grad_norm": 0.43364570855737683,
|
|
"learning_rate": 1.4345117675780511e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662436366081238,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5459.7,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 4.4352159468438535,
|
|
"grad_norm": 0.450911467541632,
|
|
"learning_rate": 1.4265699090668436e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24556297063827515,
|
|
"step": 2670,
|
|
"valid_targets_mean": 5108.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 4.443521594684386,
|
|
"grad_norm": 0.4622968892410604,
|
|
"learning_rate": 1.4186378902804939e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829596698284149,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5202.2,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.451827242524917,
|
|
"grad_norm": 0.5022527173026554,
|
|
"learning_rate": 1.4107158473278024e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21267284452915192,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3475.3,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 4.460132890365449,
|
|
"grad_norm": 0.46463815594171176,
|
|
"learning_rate": 1.402803916146392e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2095562219619751,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4958.3,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 4.46843853820598,
|
|
"grad_norm": 0.43026736726911025,
|
|
"learning_rate": 1.3949022325003714e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822760343551636,
|
|
"step": 2690,
|
|
"valid_targets_mean": 5984.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.476744186046512,
|
|
"grad_norm": 0.44382759388295756,
|
|
"learning_rate": 1.3870109319780105e-05,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23378977179527283,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5118.8,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.485049833887043,
|
|
"grad_norm": 0.44583053480779433,
|
|
"learning_rate": 1.3791301499894082e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20921167731285095,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4340.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 4.4933554817275745,
|
|
"grad_norm": 0.42871996800972517,
|
|
"learning_rate": 1.371260021764173e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23499371111392975,
|
|
"step": 2705,
|
|
"valid_targets_mean": 5162.2,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 4.501661129568106,
|
|
"grad_norm": 0.4205139954176755,
|
|
"learning_rate": 1.3634006823490993e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2795257568359375,
|
|
"step": 2710,
|
|
"valid_targets_mean": 6579.1,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 4.509966777408637,
|
|
"grad_norm": 0.418249182209601,
|
|
"learning_rate": 1.3555522666058549e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504189610481262,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5548.5,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 4.51827242524917,
|
|
"grad_norm": 0.44856642492490695,
|
|
"learning_rate": 1.3477149092086612e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17640933394432068,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.526578073089701,
|
|
"grad_norm": 0.46922626835172715,
|
|
"learning_rate": 1.3398887446419849e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30128753185272217,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4939.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.534883720930233,
|
|
"grad_norm": 0.445567471708935,
|
|
"learning_rate": 1.3320739071982325e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059037923812866,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5950.3,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 4.543189368770764,
|
|
"grad_norm": 0.4584725411035024,
|
|
"learning_rate": 1.3242705309754414e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19027169048786163,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4423.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 4.5514950166112955,
|
|
"grad_norm": 0.5201035622241538,
|
|
"learning_rate": 1.3164787498749822e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23454302549362183,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4015.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 4.559800664451827,
|
|
"grad_norm": 0.4952030353286917,
|
|
"learning_rate": 1.3086986975992585e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21426764130592346,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4702.4,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 4.568106312292358,
|
|
"grad_norm": 0.5286425714380149,
|
|
"learning_rate": 1.3009305076494176e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23352576792240143,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3501.7,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 4.576411960132891,
|
|
"grad_norm": 0.4930408070710576,
|
|
"learning_rate": 1.2931743133230525e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536967098712921,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4060.4,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 4.584717607973422,
|
|
"grad_norm": 0.445754559537147,
|
|
"learning_rate": 1.2854302477119204e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2212081253528595,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4655.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.593023255813954,
|
|
"grad_norm": 0.40019338376556385,
|
|
"learning_rate": 1.2776984436996553e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631382346153259,
|
|
"step": 2765,
|
|
"valid_targets_mean": 6189.8,
|
|
"valid_targets_min": 2082
|
|
},
|
|
{
|
|
"epoch": 4.601328903654485,
|
|
"grad_norm": 0.4811699745698724,
|
|
"learning_rate": 1.2699790339594923e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270926833152771,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5177.2,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 4.6096345514950166,
|
|
"grad_norm": 0.47293600673351693,
|
|
"learning_rate": 1.2622721509519846e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25983530282974243,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5001.9,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 4.617940199335548,
|
|
"grad_norm": 0.4629388243039753,
|
|
"learning_rate": 1.2545779269227358e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623574137687683,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5142.9,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 4.6262458471760795,
|
|
"grad_norm": 0.4205356140908875,
|
|
"learning_rate": 1.2468964939001278e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24877700209617615,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5409.3,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.634551495016611,
|
|
"grad_norm": 0.4640341330753694,
|
|
"learning_rate": 1.2392279836930583e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23701894283294678,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5282.2,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 4.642857142857143,
|
|
"grad_norm": 0.43866323566509396,
|
|
"learning_rate": 1.231572527888675e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2463322877883911,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5359.2,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 4.651162790697675,
|
|
"grad_norm": 0.43712164500169537,
|
|
"learning_rate": 1.2239302578501198e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26199620962142944,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5335.6,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.659468438538206,
|
|
"grad_norm": 0.4354583996387477,
|
|
"learning_rate": 1.2163013047142768e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19174820184707642,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4765.3,
|
|
"valid_targets_min": 2701
|
|
},
|
|
{
|
|
"epoch": 4.667774086378738,
|
|
"grad_norm": 0.4422446488773751,
|
|
"learning_rate": 1.2086857993895174e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26237350702285767,
|
|
"step": 2810,
|
|
"valid_targets_mean": 5103.8,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 4.676079734219269,
|
|
"grad_norm": 0.4211785527941914,
|
|
"learning_rate": 1.2010838725534573e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2117844521999359,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4882.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.6843853820598005,
|
|
"grad_norm": 0.44956329689993785,
|
|
"learning_rate": 1.1934956546507124e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26732513308525085,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4915.7,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.692691029900332,
|
|
"grad_norm": 0.4770845972811508,
|
|
"learning_rate": 1.185921275890664e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27914947271347046,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5850.4,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 4.700996677740864,
|
|
"grad_norm": 0.42465863652778474,
|
|
"learning_rate": 1.1783608662452185e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21378985047340393,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5471.6,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 4.709302325581396,
|
|
"grad_norm": 0.42579676524359583,
|
|
"learning_rate": 1.1708145554465828e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279657244682312,
|
|
"step": 2835,
|
|
"valid_targets_mean": 6063.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 4.717607973421927,
|
|
"grad_norm": 0.39107739642342926,
|
|
"learning_rate": 1.1632824729850346e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24917662143707275,
|
|
"step": 2840,
|
|
"valid_targets_mean": 6590.5,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 4.725913621262459,
|
|
"grad_norm": 0.43827762548748705,
|
|
"learning_rate": 1.155764748106702e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2380344718694687,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4695.4,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.73421926910299,
|
|
"grad_norm": 0.3929556881518351,
|
|
"learning_rate": 1.148261509811346e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24223166704177856,
|
|
"step": 2850,
|
|
"valid_targets_mean": 6619.3,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 4.7425249169435215,
|
|
"grad_norm": 0.4024804609045274,
|
|
"learning_rate": 1.1407728868501442e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20801568031311035,
|
|
"step": 2855,
|
|
"valid_targets_mean": 5320.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.750830564784053,
|
|
"grad_norm": 0.4438220607292315,
|
|
"learning_rate": 1.1332990077234874e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819922924041748,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4592.8,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.759136212624584,
|
|
"grad_norm": 0.45263223781397144,
|
|
"learning_rate": 1.1258400006787675e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21405555307865143,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4559.7,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 4.767441860465116,
|
|
"grad_norm": 0.43529563690180556,
|
|
"learning_rate": 1.1183959937081804e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21780461072921753,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4702.9,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 4.775747508305648,
|
|
"grad_norm": 0.4566403900941337,
|
|
"learning_rate": 1.1109671145465315e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28616863489151,
|
|
"step": 2875,
|
|
"valid_targets_mean": 5726.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.78405315614618,
|
|
"grad_norm": 0.40786517031038944,
|
|
"learning_rate": 1.1035534906690399e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19040827453136444,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4789.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 4.792358803986711,
|
|
"grad_norm": 0.4424889190312134,
|
|
"learning_rate": 1.0961552492891552e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2325223684310913,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5279.2,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 4.8006644518272426,
|
|
"grad_norm": 0.4711833809141337,
|
|
"learning_rate": 1.08877251735637e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181568741798401,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4408.6,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.808970099667774,
|
|
"grad_norm": 0.44787229852398797,
|
|
"learning_rate": 1.081405421554044e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24087963998317719,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4897.9,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.8172757475083055,
|
|
"grad_norm": 0.39469385505234555,
|
|
"learning_rate": 1.074054088297232e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22448964416980743,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5780.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.825581395348837,
|
|
"grad_norm": 0.36685088716682995,
|
|
"learning_rate": 1.0667186437305115e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21279945969581604,
|
|
"step": 2905,
|
|
"valid_targets_mean": 6585.0,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 4.833887043189369,
|
|
"grad_norm": 0.40468277115660617,
|
|
"learning_rate": 1.059399213725818e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23895129561424255,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5439.4,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 4.842192691029901,
|
|
"grad_norm": 0.40034985521426697,
|
|
"learning_rate": 1.0520959238802902e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25104984641075134,
|
|
"step": 2915,
|
|
"valid_targets_mean": 6120.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 4.850498338870432,
|
|
"grad_norm": 0.45109975357020304,
|
|
"learning_rate": 1.044808899514107e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23319268226623535,
|
|
"step": 2920,
|
|
"valid_targets_mean": 5040.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 4.858803986710964,
|
|
"grad_norm": 0.41709716706896544,
|
|
"learning_rate": 1.0375382656683439e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21863149106502533,
|
|
"step": 2925,
|
|
"valid_targets_mean": 5712.4,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 4.867109634551495,
|
|
"grad_norm": 0.49094326202917477,
|
|
"learning_rate": 1.0302841471028228e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769823372364044,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4314.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.8754152823920265,
|
|
"grad_norm": 0.43299483244109344,
|
|
"learning_rate": 1.0230466682939755e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2155153453350067,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4583.7,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 4.883720930232558,
|
|
"grad_norm": 0.40836362511856855,
|
|
"learning_rate": 1.0158259534327032e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22418805956840515,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5170.1,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.892026578073089,
|
|
"grad_norm": 0.4082420665051288,
|
|
"learning_rate": 1.0086221264222478e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21972543001174927,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5136.8,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 4.900332225913621,
|
|
"grad_norm": 0.4103756546294053,
|
|
"learning_rate": 1.0014353108760657e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.233613520860672,
|
|
"step": 2950,
|
|
"valid_targets_mean": 5763.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.908637873754153,
|
|
"grad_norm": 0.4284217963680313,
|
|
"learning_rate": 9.942656301157081e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713060975074768,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5246.0,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.916943521594685,
|
|
"grad_norm": 0.4791487728236056,
|
|
"learning_rate": 9.871132071687013e-06,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22866952419281006,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3940.4,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 4.925249169435216,
|
|
"grad_norm": 0.4575891507300313,
|
|
"learning_rate": 9.799781647664382e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24666227400302887,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4648.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 4.9335548172757475,
|
|
"grad_norm": 0.4690138342894829,
|
|
"learning_rate": 9.728606253420711e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2449820339679718,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4452.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.941860465116279,
|
|
"grad_norm": 0.39028349048704136,
|
|
"learning_rate": 9.65760711028414e-06,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25587061047554016,
|
|
"step": 2975,
|
|
"valid_targets_mean": 6048.2,
|
|
"valid_targets_min": 2720
|
|
},
|
|
{
|
|
"epoch": 4.95016611295681,
|
|
"grad_norm": 0.3977858351903464,
|
|
"learning_rate": 9.586785436558414e-06,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23577530682086945,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5585.4,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.958471760797342,
|
|
"grad_norm": 0.44031297851483064,
|
|
"learning_rate": 9.516142447502012e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2357667088508606,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5253.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.966777408637874,
|
|
"grad_norm": 0.44870574789025425,
|
|
"learning_rate": 9.445679355307308e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21055975556373596,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4307.1,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.975083056478406,
|
|
"grad_norm": 0.495041695062572,
|
|
"learning_rate": 9.375397369079733e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29626789689064026,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4853.4,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 4.983388704318937,
|
|
"grad_norm": 0.504693069211371,
|
|
"learning_rate": 9.305297694817046e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2178444266319275,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3305.1,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.9916943521594686,
|
|
"grad_norm": 0.42621100951121194,
|
|
"learning_rate": 9.235381535388638e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27390047907829285,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5750.5,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4171316413932259,
|
|
"learning_rate": 9.16565009051491e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22777780890464783,
|
|
"step": 3010,
|
|
"valid_targets_mean": 5378.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 5.0083056478405314,
|
|
"grad_norm": 0.4304216286524967,
|
|
"learning_rate": 9.096104556746654e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21669331192970276,
|
|
"step": 3015,
|
|
"valid_targets_mean": 5033.2,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 5.016611295681063,
|
|
"grad_norm": 0.4604261563109503,
|
|
"learning_rate": 9.026746127444532e-06,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27611351013183594,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5751.4,
|
|
"valid_targets_min": 2741
|
|
},
|
|
{
|
|
"epoch": 5.024916943521594,
|
|
"grad_norm": 0.46120419942918134,
|
|
"learning_rate": 8.957575992758622e-06,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20502465963363647,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4110.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.033222591362127,
|
|
"grad_norm": 0.44517467055180354,
|
|
"learning_rate": 8.888595339607961e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24288401007652283,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4679.4,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 5.041528239202658,
|
|
"grad_norm": 0.46953572454623016,
|
|
"learning_rate": 8.819805351660189e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2158963680267334,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4201.8,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 5.04983388704319,
|
|
"grad_norm": 0.45606815195401024,
|
|
"learning_rate": 8.751207209311268e-06,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24551312625408173,
|
|
"step": 3040,
|
|
"valid_targets_mean": 5986.5,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 5.058139534883721,
|
|
"grad_norm": 0.48652102920560825,
|
|
"learning_rate": 8.682802089665166e-06,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2140728086233139,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4784.5,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.0664451827242525,
|
|
"grad_norm": 0.45309160508969026,
|
|
"learning_rate": 8.614591166513732e-06,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21999751031398773,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4526.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.074750830564784,
|
|
"grad_norm": 0.49277275536473486,
|
|
"learning_rate": 8.54657561031649e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860904932022095,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5334.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.083056478405315,
|
|
"grad_norm": 0.4201819534787798,
|
|
"learning_rate": 8.478756588180584e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24805369973182678,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5407.9,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 5.091362126245847,
|
|
"grad_norm": 0.5008199524621624,
|
|
"learning_rate": 8.411135263840767e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562616467475891,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4334.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.099667774086379,
|
|
"grad_norm": 0.4439245857142909,
|
|
"learning_rate": 8.343712797639392e-06,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21692700684070587,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4765.7,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 5.107973421926911,
|
|
"grad_norm": 0.44447115488621913,
|
|
"learning_rate": 8.276490346506534e-06,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23482851684093475,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4945.3,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.116279069767442,
|
|
"grad_norm": 0.45580520011205805,
|
|
"learning_rate": 8.209469063940117e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659309506416321,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5147.2,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 5.1245847176079735,
|
|
"grad_norm": 0.49126035761429754,
|
|
"learning_rate": 8.142650099986147e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21253114938735962,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3621.6,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 5.132890365448505,
|
|
"grad_norm": 0.45143997418246035,
|
|
"learning_rate": 8.07603460121895e-06,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22359229624271393,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4491.1,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.141196013289036,
|
|
"grad_norm": 0.5308566295698516,
|
|
"learning_rate": 8.009623710721497e-06,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22379395365715027,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3485.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 5.149501661129568,
|
|
"grad_norm": 0.4594992607181394,
|
|
"learning_rate": 7.943418568065831e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27382892370224,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5114.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.157807308970099,
|
|
"grad_norm": 0.46910853748245257,
|
|
"learning_rate": 7.877420309293454e-06,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22792881727218628,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4528.2,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.166112956810632,
|
|
"grad_norm": 0.4114580162979351,
|
|
"learning_rate": 7.811630066895874e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20216363668441772,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5314.4,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 5.174418604651163,
|
|
"grad_norm": 0.4897127230386928,
|
|
"learning_rate": 7.746048969795153e-06,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738298177719116,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5019.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 5.1827242524916945,
|
|
"grad_norm": 0.4883036007287513,
|
|
"learning_rate": 7.680678143324558e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650836110115051,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5325.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.191029900332226,
|
|
"grad_norm": 0.4930383413161589,
|
|
"learning_rate": 7.615518709209217e-06,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25024664402008057,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4891.3,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 5.1993355481727574,
|
|
"grad_norm": 0.485506455217791,
|
|
"learning_rate": 7.5505717855468985e-06,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22848017513751984,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4230.0,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 5.207641196013289,
|
|
"grad_norm": 0.4444149600591392,
|
|
"learning_rate": 7.485838486788803e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24224744737148285,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5409.6,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 5.21594684385382,
|
|
"grad_norm": 0.42958041138579917,
|
|
"learning_rate": 7.421319923720478e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2498837262392044,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5720.4,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 5.224252491694352,
|
|
"grad_norm": 0.4361054500702985,
|
|
"learning_rate": 7.357017203442711e-06,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23597608506679535,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5465.1,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 5.232558139534884,
|
|
"grad_norm": 0.4105703695276094,
|
|
"learning_rate": 7.292931429352561e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18645566701889038,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4998.8,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.240863787375416,
|
|
"grad_norm": 0.462294143247149,
|
|
"learning_rate": 7.229063701124406e-06,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19593483209609985,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4250.3,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 5.249169435215947,
|
|
"grad_norm": 0.4933020178708707,
|
|
"learning_rate": 7.16541511469111e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854885458946228,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4863.6,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.2574750830564785,
|
|
"grad_norm": 0.47100539613922365,
|
|
"learning_rate": 7.10198676222517e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627774477005005,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5504.2,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 5.26578073089701,
|
|
"grad_norm": 0.4409598821007224,
|
|
"learning_rate": 7.038779732119996e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2099049985408783,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4666.0,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 5.274086378737541,
|
|
"grad_norm": 0.3977269961448043,
|
|
"learning_rate": 6.975795108971257e-06,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26759013533592224,
|
|
"step": 3175,
|
|
"valid_targets_mean": 6431.2,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 5.282392026578073,
|
|
"grad_norm": 0.5009812542334566,
|
|
"learning_rate": 6.9130339735582295e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23460696637630463,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4274.6,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 5.290697674418604,
|
|
"grad_norm": 0.5003835373429322,
|
|
"learning_rate": 6.850497402825267e-06,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2229413390159607,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4435.6,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 5.299003322259137,
|
|
"grad_norm": 0.48174450863156393,
|
|
"learning_rate": 6.7881864698633515e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2120680809020996,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4338.5,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 5.307308970099668,
|
|
"grad_norm": 0.40733294034477013,
|
|
"learning_rate": 6.726102243891619e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20171259343624115,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5710.9,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 5.3156146179401995,
|
|
"grad_norm": 0.42125696493601356,
|
|
"learning_rate": 6.664245790239079e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21834102272987366,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5556.6,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 5.323920265780731,
|
|
"grad_norm": 0.46148017663717666,
|
|
"learning_rate": 6.6026181703262785e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22677063941955566,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5116.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 5.332225913621262,
|
|
"grad_norm": 0.4763983971140536,
|
|
"learning_rate": 6.541220441647107e-06,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23294970393180847,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4675.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 5.340531561461794,
|
|
"grad_norm": 0.6454267309376256,
|
|
"learning_rate": 6.4800536577506846e-06,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2350747436285019,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3165.4,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 5.348837209302325,
|
|
"grad_norm": 0.4375383656138322,
|
|
"learning_rate": 6.4191188682232266e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368248552083969,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5322.1,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.357142857142857,
|
|
"grad_norm": 0.44806023917067106,
|
|
"learning_rate": 6.358417118670062e-06,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22272992134094238,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5169.2,
|
|
"valid_targets_min": 2630
|
|
},
|
|
{
|
|
"epoch": 5.365448504983389,
|
|
"grad_norm": 0.5824433006408986,
|
|
"learning_rate": 6.297949450697711e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25263458490371704,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5802.6,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 5.3737541528239205,
|
|
"grad_norm": 0.41264474318161565,
|
|
"learning_rate": 6.2377169018959735e-06,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24440588057041168,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5901.7,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 5.382059800664452,
|
|
"grad_norm": 0.46322964818009077,
|
|
"learning_rate": 6.177720505820142e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705880403518677,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5227.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.3903654485049834,
|
|
"grad_norm": 0.4787286875195174,
|
|
"learning_rate": 6.117961291973267e-06,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21100547909736633,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4280.3,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 5.398671096345515,
|
|
"grad_norm": 0.49344823648177194,
|
|
"learning_rate": 6.058440285788507e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22111806273460388,
|
|
"step": 3250,
|
|
"valid_targets_mean": 4295.1,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 5.406976744186046,
|
|
"grad_norm": 0.4856367513253345,
|
|
"learning_rate": 5.999158508611496e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23416313529014587,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4542.7,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 5.415282392026578,
|
|
"grad_norm": 0.4696054079376104,
|
|
"learning_rate": 5.940116977682843e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22714456915855408,
|
|
"step": 3260,
|
|
"valid_targets_mean": 4542.2,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 5.423588039867109,
|
|
"grad_norm": 0.38706458896597096,
|
|
"learning_rate": 5.881316706120674e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579371929168701,
|
|
"step": 3265,
|
|
"valid_targets_mean": 6672.3,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 5.431893687707642,
|
|
"grad_norm": 0.457792223877133,
|
|
"learning_rate": 5.8227587029032555e-06,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20852676033973694,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4799.7,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.440199335548173,
|
|
"grad_norm": 0.4995434111564123,
|
|
"learning_rate": 5.7644439728516544e-06,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23312224447727203,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3830.2,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 5.4485049833887045,
|
|
"grad_norm": 0.4684147891811133,
|
|
"learning_rate": 5.7063735166125134e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20380036532878876,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4138.5,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 5.456810631229236,
|
|
"grad_norm": 0.44043864069550953,
|
|
"learning_rate": 5.6485483306409015e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23983287811279297,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5807.4,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.465116279069767,
|
|
"grad_norm": 0.47851786971713395,
|
|
"learning_rate": 5.590969407183169e-06,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20528429746627808,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4414.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.473421926910299,
|
|
"grad_norm": 0.5462441338148109,
|
|
"learning_rate": 5.533637734259952e-06,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961854040622711,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3918.4,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.48172757475083,
|
|
"grad_norm": 0.4730556317294938,
|
|
"learning_rate": 5.47655429564921e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205132097005844,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4233.9,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 5.490033222591362,
|
|
"grad_norm": 0.47551450783433413,
|
|
"learning_rate": 5.419720070869365e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2287798970937729,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4495.1,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 5.498338870431894,
|
|
"grad_norm": 0.4850642793262456,
|
|
"learning_rate": 5.363136035162453e-06,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26675939559936523,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4500.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.5066445182724255,
|
|
"grad_norm": 0.4738407374148669,
|
|
"learning_rate": 5.306803159477421e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20603547990322113,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4192.2,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 5.514950166112957,
|
|
"grad_norm": 0.47541721928219915,
|
|
"learning_rate": 5.250722410453451e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2448899894952774,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4626.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 5.523255813953488,
|
|
"grad_norm": 0.4917094746261498,
|
|
"learning_rate": 5.194894750403397e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396548092365265,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3924.8,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 5.53156146179402,
|
|
"grad_norm": 0.4853623741293223,
|
|
"learning_rate": 5.139321137297233e-06,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23406252264976501,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4061.1,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 5.539867109634551,
|
|
"grad_norm": 0.4287415584824058,
|
|
"learning_rate": 5.084002524745642e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2444635033607483,
|
|
"step": 3335,
|
|
"valid_targets_mean": 6130.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.548172757475083,
|
|
"grad_norm": 0.4399669491813924,
|
|
"learning_rate": 5.028939861983659e-06,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26081007719039917,
|
|
"step": 3340,
|
|
"valid_targets_mean": 5801.7,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.556478405315614,
|
|
"grad_norm": 0.4152765899453414,
|
|
"learning_rate": 4.974134093854357e-06,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2272413671016693,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5334.8,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.5647840531561465,
|
|
"grad_norm": 0.4257996270391498,
|
|
"learning_rate": 4.919586160792644e-06,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20807236433029175,
|
|
"step": 3350,
|
|
"valid_targets_mean": 5194.5,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 5.573089700996678,
|
|
"grad_norm": 0.44138534979080535,
|
|
"learning_rate": 4.86529699880915e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.232402965426445,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5197.9,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 5.5813953488372094,
|
|
"grad_norm": 0.3796509098931709,
|
|
"learning_rate": 4.811267539474118e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22227956354618073,
|
|
"step": 3360,
|
|
"valid_targets_mean": 6966.8,
|
|
"valid_targets_min": 2783
|
|
},
|
|
{
|
|
"epoch": 5.589700996677741,
|
|
"grad_norm": 0.446752830620699,
|
|
"learning_rate": 4.757498709901469e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22506795823574066,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4925.7,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.598006644518272,
|
|
"grad_norm": 0.4912315000380674,
|
|
"learning_rate": 4.7039914327328525e-06,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21359623968601227,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4188.2,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 5.606312292358804,
|
|
"grad_norm": 0.46814229870842355,
|
|
"learning_rate": 4.650746626121838e-06,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25967177748680115,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5315.4,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 5.614617940199335,
|
|
"grad_norm": 0.3978403748395847,
|
|
"learning_rate": 4.597765203718163e-06,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22414571046829224,
|
|
"step": 3380,
|
|
"valid_targets_mean": 6147.3,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 5.622923588039868,
|
|
"grad_norm": 0.44005754548787746,
|
|
"learning_rate": 4.54504807465203e-06,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23546786606311798,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5165.6,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 5.631229235880399,
|
|
"grad_norm": 0.532008762710549,
|
|
"learning_rate": 4.4925961435185325e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866167426109314,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6012.0,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.6395348837209305,
|
|
"grad_norm": 0.438504725032332,
|
|
"learning_rate": 4.4404103103621136e-06,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22224926948547363,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4584.6,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 5.647840531561462,
|
|
"grad_norm": 0.43474486938117457,
|
|
"learning_rate": 4.388491470661149e-06,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911046802997589,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4708.2,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 5.656146179401993,
|
|
"grad_norm": 0.38977631994239326,
|
|
"learning_rate": 4.3368405153125435e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25848114490509033,
|
|
"step": 3405,
|
|
"valid_targets_mean": 7304.8,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 5.664451827242525,
|
|
"grad_norm": 0.4421169042139646,
|
|
"learning_rate": 4.285458330616467e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26357150077819824,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5615.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 5.672757475083056,
|
|
"grad_norm": 0.4443208885163003,
|
|
"learning_rate": 4.2343457982611595e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26102685928344727,
|
|
"step": 3415,
|
|
"valid_targets_mean": 5600.4,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 5.681063122923588,
|
|
"grad_norm": 0.4505495102445253,
|
|
"learning_rate": 4.183503795307762e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571350932121277,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5339.7,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 5.689368770764119,
|
|
"grad_norm": 0.4555742022652996,
|
|
"learning_rate": 4.132933194175299e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29356956481933594,
|
|
"step": 3425,
|
|
"valid_targets_mean": 5608.4,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 5.6976744186046515,
|
|
"grad_norm": 0.4001579148134935,
|
|
"learning_rate": 4.082634862625697e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27087894082069397,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6153.1,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.705980066445183,
|
|
"grad_norm": 0.4095628360710088,
|
|
"learning_rate": 4.032609663748898e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19861236214637756,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5550.6,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.5261516186695756,
|
|
"learning_rate": 3.982858455948051e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835808992385864,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5236.3,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 5.722591362126246,
|
|
"grad_norm": 0.4183207662986473,
|
|
"learning_rate": 3.933382092924769e-06,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21700972318649292,
|
|
"step": 3445,
|
|
"valid_targets_mean": 5654.6,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 5.730897009966777,
|
|
"grad_norm": 0.46629041238833296,
|
|
"learning_rate": 3.8841814236644856e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17355819046497345,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3970.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.739202657807309,
|
|
"grad_norm": 0.4841699154836914,
|
|
"learning_rate": 3.835257292421915e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2557890713214874,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5023.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.74750830564784,
|
|
"grad_norm": 0.4218808064629392,
|
|
"learning_rate": 3.786610538706514e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607504427433014,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5998.1,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 5.7558139534883725,
|
|
"grad_norm": 0.4420450190541701,
|
|
"learning_rate": 3.7382419972681196e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2407008856534958,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5490.1,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 5.764119601328904,
|
|
"grad_norm": 0.4540154634711204,
|
|
"learning_rate": 3.690152498082593e-06,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24033673107624054,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5359.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.7724252491694354,
|
|
"grad_norm": 0.4483031244836369,
|
|
"learning_rate": 3.642342866337618e-06,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22894030809402466,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4689.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.780730897009967,
|
|
"grad_norm": 0.4434838616508586,
|
|
"learning_rate": 3.5948139224184963e-06,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21478234231472015,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4750.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 5.789036544850498,
|
|
"grad_norm": 0.45404626311984925,
|
|
"learning_rate": 3.5475664818940915e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18791207671165466,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4209.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.79734219269103,
|
|
"grad_norm": 0.49260684291055373,
|
|
"learning_rate": 3.5006013555028508e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25759583711624146,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4916.9,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 5.805647840531561,
|
|
"grad_norm": 0.4107225439742698,
|
|
"learning_rate": 3.453919349138859e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22329118847846985,
|
|
"step": 3495,
|
|
"valid_targets_mean": 6269.4,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 5.813953488372093,
|
|
"grad_norm": 0.4175230550237731,
|
|
"learning_rate": 3.4075212638380274e-06,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21239593625068665,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5465.2,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 5.822259136212624,
|
|
"grad_norm": 0.40653209530974055,
|
|
"learning_rate": 3.361407895764364e-06,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24708881974220276,
|
|
"step": 3505,
|
|
"valid_targets_mean": 6670.7,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 5.8305647840531565,
|
|
"grad_norm": 0.48805279786035993,
|
|
"learning_rate": 3.3155800361962733e-06,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24383389949798584,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4484.7,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 5.838870431893688,
|
|
"grad_norm": 0.4292492557994935,
|
|
"learning_rate": 3.270038471513022e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24906840920448303,
|
|
"step": 3515,
|
|
"valid_targets_mean": 6060.6,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 5.847176079734219,
|
|
"grad_norm": 0.502442885949651,
|
|
"learning_rate": 3.224783983181212e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23381106555461884,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5117.4,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 5.855481727574751,
|
|
"grad_norm": 0.4892455076488496,
|
|
"learning_rate": 3.179817347741381e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526337802410126,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4235.6,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.863787375415282,
|
|
"grad_norm": 0.47359435978595765,
|
|
"learning_rate": 3.1351393367946904e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2487560212612152,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4572.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 5.872093023255814,
|
|
"grad_norm": 0.46021738820867125,
|
|
"learning_rate": 3.0907507169896657e-06,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20863378047943115,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4654.3,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 5.880398671096345,
|
|
"grad_norm": 0.43168878258385085,
|
|
"learning_rate": 3.0466522500090435e-06,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26234740018844604,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5694.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.8887043189368775,
|
|
"grad_norm": 0.537403999126968,
|
|
"learning_rate": 3.00284469255673e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21333393454551697,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4123.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 5.897009966777409,
|
|
"grad_norm": 0.42603181677170304,
|
|
"learning_rate": 2.959328796344774e-06,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744417190551758,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5970.3,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 5.90531561461794,
|
|
"grad_norm": 0.44624072737224996,
|
|
"learning_rate": 2.9161053080804924e-06,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650454640388489,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5331.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.913621262458472,
|
|
"grad_norm": 0.43297999326233494,
|
|
"learning_rate": 2.8731749694536514e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445681393146515,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5539.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 5.921926910299003,
|
|
"grad_norm": 0.4469511684066743,
|
|
"learning_rate": 2.8305385171237533e-06,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23026373982429504,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4860.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 5.930232558139535,
|
|
"grad_norm": 0.49331430834165785,
|
|
"learning_rate": 2.7881966827073694e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2263932079076767,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4127.2,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.938538205980066,
|
|
"grad_norm": 0.4462762373358431,
|
|
"learning_rate": 2.7461501927656066e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25776398181915283,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5121.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 5.946843853820598,
|
|
"grad_norm": 0.48137999647515634,
|
|
"learning_rate": 2.7043997687916233e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20870786905288696,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4762.0,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.955149501661129,
|
|
"grad_norm": 0.44492351974263955,
|
|
"learning_rate": 2.6629461271982782e-06,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21499571204185486,
|
|
"step": 3585,
|
|
"valid_targets_mean": 5388.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 5.9634551495016614,
|
|
"grad_norm": 0.44430928201290437,
|
|
"learning_rate": 2.6217899793058045e-06,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22696585953235626,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4949.3,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 5.971760797342193,
|
|
"grad_norm": 0.43145539994008186,
|
|
"learning_rate": 2.580932031329615e-06,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558603286743164,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5676.7,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 5.980066445182724,
|
|
"grad_norm": 0.45500897055756284,
|
|
"learning_rate": 2.5403729843681977e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20234350860118866,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4266.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.988372093023256,
|
|
"grad_norm": 0.42916757485297197,
|
|
"learning_rate": 2.500113534391069e-06,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21844570338726044,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5508.3,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 5.996677740863787,
|
|
"grad_norm": 0.4490427875960071,
|
|
"learning_rate": 2.4601543722268306e-06,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2069842517375946,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4250.4,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 6.004983388704319,
|
|
"grad_norm": 0.4406027520006221,
|
|
"learning_rate": 2.4204961835513263e-06,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26036739349365234,
|
|
"step": 3615,
|
|
"valid_targets_mean": 6218.9,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 6.01328903654485,
|
|
"grad_norm": 0.4520647885441624,
|
|
"learning_rate": 2.381139648875874e-06,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24916315078735352,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5220.8,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 6.0215946843853825,
|
|
"grad_norm": 0.42185220877699714,
|
|
"learning_rate": 2.342085443535582e-06,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24784894287586212,
|
|
"step": 3625,
|
|
"valid_targets_mean": 6295.3,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 6.029900332225914,
|
|
"grad_norm": 0.5114541372945465,
|
|
"learning_rate": 2.303334237677761e-06,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22458359599113464,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3602.7,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 6.038205980066445,
|
|
"grad_norm": 0.4292979044249428,
|
|
"learning_rate": 2.264886696250428e-06,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681623697280884,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5899.6,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 6.046511627906977,
|
|
"grad_norm": 0.42970109349265695,
|
|
"learning_rate": 2.226743478990907e-06,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19652216136455536,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4847.6,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 6.054817275747508,
|
|
"grad_norm": 0.46892947994010903,
|
|
"learning_rate": 2.188905240414485e-06,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23695795238018036,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4636.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 6.06312292358804,
|
|
"grad_norm": 0.47338349438902694,
|
|
"learning_rate": 2.1513726298031943e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575342059135437,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4424.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.071428571428571,
|
|
"grad_norm": 0.4593067635017082,
|
|
"learning_rate": 2.114146291194683e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24932947754859924,
|
|
"step": 3655,
|
|
"valid_targets_mean": 5104.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.079734219269103,
|
|
"grad_norm": 0.41654252858628765,
|
|
"learning_rate": 2.0772268633711333e-06,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891307532787323,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5264.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 6.088039867109635,
|
|
"grad_norm": 0.4912178386839996,
|
|
"learning_rate": 2.040614979848323e-06,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18531829118728638,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4130.5,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.096345514950166,
|
|
"grad_norm": 0.4376936958458712,
|
|
"learning_rate": 2.0043112688647624e-06,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2036692202091217,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5368.9,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.104651162790698,
|
|
"grad_norm": 0.43913791377980976,
|
|
"learning_rate": 1.9683163533708804e-06,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.226030170917511,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4783.4,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 6.112956810631229,
|
|
"grad_norm": 0.41095901624268805,
|
|
"learning_rate": 1.9326308510183778e-06,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24913448095321655,
|
|
"step": 3680,
|
|
"valid_targets_mean": 6074.4,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.121262458471761,
|
|
"grad_norm": 0.43719377333240167,
|
|
"learning_rate": 1.8972553741495891e-06,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2161942720413208,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5339.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.129568106312292,
|
|
"grad_norm": 0.42421098780784455,
|
|
"learning_rate": 1.8621905297870001e-06,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526973485946655,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5977.4,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.137873754152824,
|
|
"grad_norm": 0.45924480049790634,
|
|
"learning_rate": 1.827436919622827e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23790693283081055,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5202.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.146179401993355,
|
|
"grad_norm": 0.4880599852148839,
|
|
"learning_rate": 1.7929951400086865e-06,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22151699662208557,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4151.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.1544850498338874,
|
|
"grad_norm": 0.40429790713258357,
|
|
"learning_rate": 1.7588657819453647e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26707690954208374,
|
|
"step": 3705,
|
|
"valid_targets_mean": 7080.9,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 6.162790697674419,
|
|
"grad_norm": 0.47297131442022866,
|
|
"learning_rate": 1.7250494310726717e-06,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547183632850647,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5275.9,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 6.17109634551495,
|
|
"grad_norm": 0.4621023065620131,
|
|
"learning_rate": 1.6915466676594139e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20951791107654572,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4462.2,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 6.179401993355482,
|
|
"grad_norm": 0.487399778127988,
|
|
"learning_rate": 1.6583580665934019e-06,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24150486290454865,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4409.5,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 6.187707641196013,
|
|
"grad_norm": 0.3794131769462949,
|
|
"learning_rate": 1.625484197371614e-06,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24992501735687256,
|
|
"step": 3725,
|
|
"valid_targets_mean": 7038.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.196013289036545,
|
|
"grad_norm": 0.47656183178631484,
|
|
"learning_rate": 1.5929256240904134e-06,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19050493836402893,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3908.0,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 6.204318936877076,
|
|
"grad_norm": 0.4362689584320358,
|
|
"learning_rate": 1.5606829054358686e-06,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21063345670700073,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4625.2,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 6.212624584717608,
|
|
"grad_norm": 0.42964660492749973,
|
|
"learning_rate": 1.5287565946741634e-06,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985970377922058,
|
|
"step": 3740,
|
|
"valid_targets_mean": 6667.5,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.22093023255814,
|
|
"grad_norm": 0.43382987392601186,
|
|
"learning_rate": 1.4971472396421117e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2014102041721344,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4609.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 6.229235880398671,
|
|
"grad_norm": 0.3979195788815418,
|
|
"learning_rate": 1.4658553827377576e-06,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1915767788887024,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5585.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.237541528239203,
|
|
"grad_norm": 0.4244684978312488,
|
|
"learning_rate": 1.4348815609110522e-06,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2314644455909729,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5875.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.245847176079734,
|
|
"grad_norm": 0.4704754562274346,
|
|
"learning_rate": 1.4042263056546568e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20254847407341003,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4501.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 6.254152823920266,
|
|
"grad_norm": 0.48886860207280436,
|
|
"learning_rate": 1.3738901429948115e-06,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28986746072769165,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5035.8,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 6.262458471760797,
|
|
"grad_norm": 0.49440928930241734,
|
|
"learning_rate": 1.3438735934823277e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22345708310604095,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4126.3,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.270764119601329,
|
|
"grad_norm": 0.4467658950447266,
|
|
"learning_rate": 1.314177172183626e-06,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21315069496631622,
|
|
"step": 3775,
|
|
"valid_targets_mean": 4969.0,
|
|
"valid_targets_min": 213
|
|
},
|
|
{
|
|
"epoch": 6.27906976744186,
|
|
"grad_norm": 0.44103654639231465,
|
|
"learning_rate": 1.2848013886719213e-06,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23546433448791504,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5226.8,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 6.287375415282392,
|
|
"grad_norm": 0.4380949997806405,
|
|
"learning_rate": 1.2557467470184759e-06,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22821277379989624,
|
|
"step": 3785,
|
|
"valid_targets_mean": 5116.6,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.295681063122924,
|
|
"grad_norm": 0.44117712466686354,
|
|
"learning_rate": 1.2270137457839382e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2234482765197754,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4946.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 6.303986710963455,
|
|
"grad_norm": 0.42338874116610004,
|
|
"learning_rate": 1.198602878009798e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2421126663684845,
|
|
"step": 3795,
|
|
"valid_targets_mean": 6022.7,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 6.312292358803987,
|
|
"grad_norm": 0.40559963127661497,
|
|
"learning_rate": 1.1705146312099203e-06,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19582486152648926,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5444.2,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 6.320598006644518,
|
|
"grad_norm": 0.43180156150267923,
|
|
"learning_rate": 1.1427494873621935e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26115962862968445,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5860.6,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 6.32890365448505,
|
|
"grad_norm": 0.41684143737124363,
|
|
"learning_rate": 1.1153079229002394e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.218938946723938,
|
|
"step": 3810,
|
|
"valid_targets_mean": 6020.2,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 6.337209302325581,
|
|
"grad_norm": 0.45674312962182084,
|
|
"learning_rate": 1.088190408705243e-06,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20486296713352203,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4557.6,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 6.3455149501661126,
|
|
"grad_norm": 0.4182281087497353,
|
|
"learning_rate": 1.0613974100978885e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25584468245506287,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6093.6,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 6.353820598006645,
|
|
"grad_norm": 0.43102254418050767,
|
|
"learning_rate": 1.0349293868303501e-06,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2189723253250122,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5130.7,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 6.362126245847176,
|
|
"grad_norm": 0.4251329240830342,
|
|
"learning_rate": 1.0087867930784268e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22146539390087128,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5422.0,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 6.370431893687708,
|
|
"grad_norm": 0.45817210971295447,
|
|
"learning_rate": 9.829700774337314e-07,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23364490270614624,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4721.7,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.378737541528239,
|
|
"grad_norm": 0.4547993102966444,
|
|
"learning_rate": 9.57479682895992e-07,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25395452976226807,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5376.8,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.387043189368771,
|
|
"grad_norm": 0.39685239298428626,
|
|
"learning_rate": 9.323160468654757e-07,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20812727510929108,
|
|
"step": 3845,
|
|
"valid_targets_mean": 6078.3,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 6.395348837209302,
|
|
"grad_norm": 0.4835685832356498,
|
|
"learning_rate": 9.074796011354481e-07,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25480324029922485,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5138.6,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 6.403654485049834,
|
|
"grad_norm": 0.5644104013016655,
|
|
"learning_rate": 8.829707718847835e-07,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22046582400798798,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5587.8,
|
|
"valid_targets_min": 3158
|
|
},
|
|
{
|
|
"epoch": 6.411960132890365,
|
|
"grad_norm": 0.4489557473002171,
|
|
"learning_rate": 8.587899796706578e-07,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1978766918182373,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4630.5,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 6.420265780730897,
|
|
"grad_norm": 0.48161798179923787,
|
|
"learning_rate": 8.34937639421316e-07,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2397552728652954,
|
|
"step": 3865,
|
|
"valid_targets_mean": 5019.3,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.43506495912370285,
|
|
"learning_rate": 8.114141604289627e-07,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25560131669044495,
|
|
"step": 3870,
|
|
"valid_targets_mean": 6158.9,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 6.43687707641196,
|
|
"grad_norm": 0.4520968237680518,
|
|
"learning_rate": 7.882199463427276e-07,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2333422750234604,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4943.4,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 6.445182724252492,
|
|
"grad_norm": 0.4767281818177688,
|
|
"learning_rate": 7.653553951617642e-07,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603071630001068,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4971.0,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 6.453488372093023,
|
|
"grad_norm": 0.5248512589328865,
|
|
"learning_rate": 7.428208992283848e-07,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19664353132247925,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 6.461794019933555,
|
|
"grad_norm": 0.42658483364281186,
|
|
"learning_rate": 7.206168452213602e-07,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2485518455505371,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5422.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 6.470099667774086,
|
|
"grad_norm": 0.5189571231640231,
|
|
"learning_rate": 6.987436141492598e-07,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24648882448673248,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4074.3,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 6.4784053156146175,
|
|
"grad_norm": 0.42060118697418014,
|
|
"learning_rate": 6.772015813439381e-07,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19569548964500427,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5250.7,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 6.48671096345515,
|
|
"grad_norm": 0.44736985504712007,
|
|
"learning_rate": 6.559911164540711e-07,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24899700284004211,
|
|
"step": 3905,
|
|
"valid_targets_mean": 5416.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.495016611295681,
|
|
"grad_norm": 0.42214113202222237,
|
|
"learning_rate": 6.351125834388239e-07,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20858308672904968,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5364.3,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 6.503322259136213,
|
|
"grad_norm": 0.46665780330649215,
|
|
"learning_rate": 6.145663405616131e-07,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2358744740486145,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5301.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.511627906976744,
|
|
"grad_norm": 0.4607453735665324,
|
|
"learning_rate": 5.943527403839433e-07,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516765594482422,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5014.3,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.519933554817276,
|
|
"grad_norm": 0.5022510621686569,
|
|
"learning_rate": 5.744721297593647e-07,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127198815345764,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5381.6,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.528239202657807,
|
|
"grad_norm": 0.4193895346270615,
|
|
"learning_rate": 5.549248498275228e-07,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21291033923625946,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5334.9,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 6.5365448504983386,
|
|
"grad_norm": 0.42527822036573054,
|
|
"learning_rate": 5.357112360083094e-07,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22638669610023499,
|
|
"step": 3935,
|
|
"valid_targets_mean": 5197.6,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 6.544850498338871,
|
|
"grad_norm": 0.42710472507003977,
|
|
"learning_rate": 5.168316179960941e-07,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24042093753814697,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5650.6,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 6.553156146179402,
|
|
"grad_norm": 0.44295001370632714,
|
|
"learning_rate": 4.982863197540733e-07,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232985496520996,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4831.5,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 6.561461794019934,
|
|
"grad_norm": 0.5100489637372868,
|
|
"learning_rate": 4.800756595087164e-07,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24133378267288208,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4350.4,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 6.569767441860465,
|
|
"grad_norm": 0.5120819717171471,
|
|
"learning_rate": 4.6219994974429973e-07,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1988787055015564,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3363.6,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 6.578073089700997,
|
|
"grad_norm": 0.4988878184595401,
|
|
"learning_rate": 4.446594971975415e-07,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26221519708633423,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4402.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.586378737541528,
|
|
"grad_norm": 0.39618065452618473,
|
|
"learning_rate": 4.274546028523441e-07,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033381164073944,
|
|
"step": 3965,
|
|
"valid_targets_mean": 6464.8,
|
|
"valid_targets_min": 2637
|
|
},
|
|
{
|
|
"epoch": 6.59468438538206,
|
|
"grad_norm": 0.5083484106187065,
|
|
"learning_rate": 4.1058556193463327e-07,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20945313572883606,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4034.3,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 6.602990033222591,
|
|
"grad_norm": 0.43286528707228594,
|
|
"learning_rate": 3.9405266390727836e-07,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22717797756195068,
|
|
"step": 3975,
|
|
"valid_targets_mean": 5301.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 6.6112956810631225,
|
|
"grad_norm": 0.5265775207623888,
|
|
"learning_rate": 3.778561924651336e-07,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677117586135864,
|
|
"step": 3980,
|
|
"valid_targets_mean": 5437.9,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 6.619601328903655,
|
|
"grad_norm": 0.5114993053364497,
|
|
"learning_rate": 3.6199642553017776e-07,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24857084453105927,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4158.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.627906976744186,
|
|
"grad_norm": 0.5467535277477339,
|
|
"learning_rate": 3.4647363524672906e-07,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22004690766334534,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3768.5,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.636212624584718,
|
|
"grad_norm": 0.4458245562911476,
|
|
"learning_rate": 3.312880879767888e-07,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2320585697889328,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5251.3,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 6.644518272425249,
|
|
"grad_norm": 0.4881370560889157,
|
|
"learning_rate": 3.164400442954607e-07,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23322418332099915,
|
|
"step": 4000,
|
|
"valid_targets_mean": 5023.7,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 6.652823920265781,
|
|
"grad_norm": 0.44068012382787336,
|
|
"learning_rate": 3.0192975898649e-07,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26618653535842896,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5556.6,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 6.661129568106312,
|
|
"grad_norm": 0.41570301729823905,
|
|
"learning_rate": 2.8775748103788026e-07,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20967671275138855,
|
|
"step": 4010,
|
|
"valid_targets_mean": 5448.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 6.6694352159468435,
|
|
"grad_norm": 0.4351984660551356,
|
|
"learning_rate": 2.739234536376301e-07,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2179790735244751,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4860.9,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.677740863787376,
|
|
"grad_norm": 0.4503957654825089,
|
|
"learning_rate": 2.604279141695565e-07,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.238296240568161,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5071.1,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 6.686046511627907,
|
|
"grad_norm": 0.4268352556996379,
|
|
"learning_rate": 2.4727109420921826e-07,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25402626395225525,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5860.2,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 6.694352159468439,
|
|
"grad_norm": 0.48054122102480573,
|
|
"learning_rate": 2.3445321951995003e-07,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2354814112186432,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4603.8,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 6.70265780730897,
|
|
"grad_norm": 0.4679844836975499,
|
|
"learning_rate": 2.219745100489834e-07,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657753527164459,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5029.9,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 6.710963455149502,
|
|
"grad_norm": 0.44649428956735077,
|
|
"learning_rate": 2.0983517992366975e-07,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18976466357707977,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4670.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.719269102990033,
|
|
"grad_norm": 0.553020789108746,
|
|
"learning_rate": 1.9803543744780994e-07,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2408244013786316,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3428.1,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 6.7275747508305646,
|
|
"grad_norm": 0.4091392385784415,
|
|
"learning_rate": 1.865754850980861e-07,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20604223012924194,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5804.9,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 6.735880398671096,
|
|
"grad_norm": 0.4530168166935197,
|
|
"learning_rate": 1.7545551952057095e-07,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2344244420528412,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5148.4,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 6.7441860465116275,
|
|
"grad_norm": 0.4373942980102208,
|
|
"learning_rate": 1.6467573152736837e-07,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22142934799194336,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4895.0,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.75249169435216,
|
|
"grad_norm": 0.43186945816142636,
|
|
"learning_rate": 1.5423630609333607e-07,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19730550050735474,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4574.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 6.760797342192691,
|
|
"grad_norm": 0.4367488153987472,
|
|
"learning_rate": 1.4413742235290573e-07,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2180730551481247,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4928.5,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 6.769102990033223,
|
|
"grad_norm": 0.46157545465003963,
|
|
"learning_rate": 1.3437925359701008e-07,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21585121750831604,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4875.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 6.777408637873754,
|
|
"grad_norm": 0.4182049457053235,
|
|
"learning_rate": 1.2496196727011856e-07,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900090128183365,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5046.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.785714285714286,
|
|
"grad_norm": 0.45420717755761053,
|
|
"learning_rate": 1.1588572496735506e-07,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511643171310425,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5257.6,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.794019933554817,
|
|
"grad_norm": 0.4265664585731597,
|
|
"learning_rate": 1.0715068243172699e-07,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25119948387145996,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5701.2,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.8023255813953485,
|
|
"grad_norm": 0.49360088658945495,
|
|
"learning_rate": 9.875698955145174e-08,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218395173549652,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4208.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.810631229235881,
|
|
"grad_norm": 0.42921566858279875,
|
|
"learning_rate": 9.070479035738988e-08,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21878480911254883,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5144.0,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 6.818936877076412,
|
|
"grad_norm": 0.397846535988314,
|
|
"learning_rate": 8.299422302056715e-08,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2280501127243042,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5825.4,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 6.827242524916944,
|
|
"grad_norm": 0.45271541608613003,
|
|
"learning_rate": 7.562541984980964e-08,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214854657649994,
|
|
"step": 4110,
|
|
"valid_targets_mean": 5196.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 6.835548172757475,
|
|
"grad_norm": 0.4724043554723174,
|
|
"learning_rate": 6.85985072894657e-08,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21336227655410767,
|
|
"step": 4115,
|
|
"valid_targets_mean": 5587.2,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 6.843853820598007,
|
|
"grad_norm": 0.4190159338936968,
|
|
"learning_rate": 6.19136059172476e-08,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053281009197235,
|
|
"step": 4120,
|
|
"valid_targets_mean": 6608.1,
|
|
"valid_targets_min": 2693
|
|
},
|
|
{
|
|
"epoch": 6.852159468438538,
|
|
"grad_norm": 0.45616786218150057,
|
|
"learning_rate": 5.557083044214651e-08,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2360706478357315,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5219.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 6.8604651162790695,
|
|
"grad_norm": 0.42817836224043204,
|
|
"learning_rate": 4.957028970248301e-08,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21489188075065613,
|
|
"step": 4130,
|
|
"valid_targets_mean": 5289.4,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.868770764119601,
|
|
"grad_norm": 0.43464394020619423,
|
|
"learning_rate": 4.3912086664021914e-08,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22122065722942352,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4856.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 6.877076411960132,
|
|
"grad_norm": 0.4612697389466052,
|
|
"learning_rate": 3.859631841822031e-08,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24951687455177307,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4845.0,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 6.885382059800665,
|
|
"grad_norm": 0.4332335176639186,
|
|
"learning_rate": 3.3623076180548944e-08,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24355295300483704,
|
|
"step": 4145,
|
|
"valid_targets_mean": 5416.5,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.893687707641196,
|
|
"grad_norm": 0.4579726056882762,
|
|
"learning_rate": 2.8992445288935633e-08,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22523343563079834,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5287.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 6.901993355481728,
|
|
"grad_norm": 0.3769114494872949,
|
|
"learning_rate": 2.4704505202295393e-08,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20762351155281067,
|
|
"step": 4155,
|
|
"valid_targets_mean": 6458.6,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 6.910299003322259,
|
|
"grad_norm": 0.4394576027913015,
|
|
"learning_rate": 2.0759329499173698e-08,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23539859056472778,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5018.5,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.9186046511627906,
|
|
"grad_norm": 0.4302801844014137,
|
|
"learning_rate": 1.7156985876476406e-08,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556449770927429,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5624.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 6.926910299003322,
|
|
"grad_norm": 0.4114782513677338,
|
|
"learning_rate": 1.3897536148310687e-08,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20943684875965118,
|
|
"step": 4170,
|
|
"valid_targets_mean": 6467.6,
|
|
"valid_targets_min": 3406
|
|
},
|
|
{
|
|
"epoch": 6.9352159468438535,
|
|
"grad_norm": 0.4506559931539549,
|
|
"learning_rate": 1.09810362449303e-08,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210254907608032,
|
|
"step": 4175,
|
|
"valid_targets_mean": 6316.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 6.943521594684386,
|
|
"grad_norm": 0.43280470708522023,
|
|
"learning_rate": 8.407536211765265e-09,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23143652081489563,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5221.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.951827242524917,
|
|
"grad_norm": 0.43952346019541544,
|
|
"learning_rate": 6.1770802085714395e-09,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23755702376365662,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5767.0,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 6.960132890365449,
|
|
"grad_norm": 0.42803157370692796,
|
|
"learning_rate": 4.289706508666669e-09,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26991814374923706,
|
|
"step": 4190,
|
|
"valid_targets_mean": 6233.2,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.96843853820598,
|
|
"grad_norm": 0.38868944381203674,
|
|
"learning_rate": 2.7454474982824276e-09,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19987355172634125,
|
|
"step": 4195,
|
|
"valid_targets_mean": 6257.0,
|
|
"valid_targets_min": 3167
|
|
},
|
|
{
|
|
"epoch": 6.976744186046512,
|
|
"grad_norm": 0.5135484098311326,
|
|
"learning_rate": 1.5443296759976022e-09,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23415961861610413,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4507.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.985049833887043,
|
|
"grad_norm": 0.5213530616998626,
|
|
"learning_rate": 6.863736522899623e-10,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24501436948776245,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4907.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 6.9933554817275745,
|
|
"grad_norm": 0.36803583817726176,
|
|
"learning_rate": 1.715941491853279e-10,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473907768726349,
|
|
"step": 4210,
|
|
"valid_targets_mean": 7456.6,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21552443504333496,
|
|
"step": 4214,
|
|
"total_flos": 1717709353779200.0,
|
|
"train_loss": 0.15650485047922919,
|
|
"train_runtime": 20216.2486,
|
|
"train_samples_per_second": 3.333,
|
|
"train_steps_per_second": 0.208,
|
|
"valid_targets_mean": 5110.8,
|
|
"valid_targets_min": 529
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4214,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1717709353779200.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|