Model: laion/exp-uns-r2egym-16_8x_glm_4_7_traces_jupiter_cleaned Source: Original Platform
10223 lines
283 KiB
JSON
10223 lines
283 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4627,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007564296520423601,
|
|
"grad_norm": 23.264870081576962,
|
|
"learning_rate": 3.455723542116631e-07,
|
|
"loss": 0.8774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5025818347930908,
|
|
"step": 5,
|
|
"valid_targets_mean": 3535.4,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.015128593040847202,
|
|
"grad_norm": 23.60347775176598,
|
|
"learning_rate": 7.77537796976242e-07,
|
|
"loss": 0.8933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5544657707214355,
|
|
"step": 10,
|
|
"valid_targets_mean": 4782.8,
|
|
"valid_targets_min": 3999
|
|
},
|
|
{
|
|
"epoch": 0.0226928895612708,
|
|
"grad_norm": 23.12929908110619,
|
|
"learning_rate": 1.209503239740821e-06,
|
|
"loss": 0.8446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47318148612976074,
|
|
"step": 15,
|
|
"valid_targets_mean": 4022.2,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.030257186081694403,
|
|
"grad_norm": 15.86142508449032,
|
|
"learning_rate": 1.6414686825053995e-06,
|
|
"loss": 0.8098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.479974627494812,
|
|
"step": 20,
|
|
"valid_targets_mean": 4029.5,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.037821482602118005,
|
|
"grad_norm": 8.911890037845783,
|
|
"learning_rate": 2.0734341252699786e-06,
|
|
"loss": 0.7471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37518471479415894,
|
|
"step": 25,
|
|
"valid_targets_mean": 3756.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 0.0453857791225416,
|
|
"grad_norm": 4.721122041516266,
|
|
"learning_rate": 2.505399568034557e-06,
|
|
"loss": 0.6957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3397650718688965,
|
|
"step": 30,
|
|
"valid_targets_mean": 3732.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.0529500756429652,
|
|
"grad_norm": 2.9680748459179305,
|
|
"learning_rate": 2.9373650107991366e-06,
|
|
"loss": 0.6627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3125842809677124,
|
|
"step": 35,
|
|
"valid_targets_mean": 2654.9,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 0.060514372163388806,
|
|
"grad_norm": 1.8650288110198525,
|
|
"learning_rate": 3.369330453563715e-06,
|
|
"loss": 0.5966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35618656873703003,
|
|
"step": 40,
|
|
"valid_targets_mean": 5729.8,
|
|
"valid_targets_min": 4365
|
|
},
|
|
{
|
|
"epoch": 0.0680786686838124,
|
|
"grad_norm": 1.5547857260434315,
|
|
"learning_rate": 3.801295896328294e-06,
|
|
"loss": 0.589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34688353538513184,
|
|
"step": 45,
|
|
"valid_targets_mean": 4405.4,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 0.07564296520423601,
|
|
"grad_norm": 1.4203959826555785,
|
|
"learning_rate": 4.233261339092873e-06,
|
|
"loss": 0.5655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816385328769684,
|
|
"step": 50,
|
|
"valid_targets_mean": 3555.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.0832072617246596,
|
|
"grad_norm": 1.1022227849897155,
|
|
"learning_rate": 4.665226781857452e-06,
|
|
"loss": 0.5613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250712513923645,
|
|
"step": 55,
|
|
"valid_targets_mean": 3661.1,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.0907715582450832,
|
|
"grad_norm": 0.9522848551591522,
|
|
"learning_rate": 5.09719222462203e-06,
|
|
"loss": 0.5287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109261095523834,
|
|
"step": 60,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 0.09833585476550681,
|
|
"grad_norm": 0.8342255037333413,
|
|
"learning_rate": 5.52915766738661e-06,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23312246799468994,
|
|
"step": 65,
|
|
"valid_targets_mean": 3770.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.1059001512859304,
|
|
"grad_norm": 0.9260711445437624,
|
|
"learning_rate": 5.961123110151188e-06,
|
|
"loss": 0.5119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264266014099121,
|
|
"step": 70,
|
|
"valid_targets_mean": 2632.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.11346444780635401,
|
|
"grad_norm": 0.8805330353158601,
|
|
"learning_rate": 6.393088552915767e-06,
|
|
"loss": 0.5132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29181522130966187,
|
|
"step": 75,
|
|
"valid_targets_mean": 3743.5,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 0.12102874432677761,
|
|
"grad_norm": 0.8265110662455508,
|
|
"learning_rate": 6.825053995680346e-06,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34186261892318726,
|
|
"step": 80,
|
|
"valid_targets_mean": 4861.5,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.12859304084720122,
|
|
"grad_norm": 0.7001236176631496,
|
|
"learning_rate": 7.257019438444926e-06,
|
|
"loss": 0.4886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28147417306900024,
|
|
"step": 85,
|
|
"valid_targets_mean": 4018.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.1361573373676248,
|
|
"grad_norm": 0.6456389847615942,
|
|
"learning_rate": 7.688984881209504e-06,
|
|
"loss": 0.4597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22170212864875793,
|
|
"step": 90,
|
|
"valid_targets_mean": 4134.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.1437216338880484,
|
|
"grad_norm": 0.6881326295789646,
|
|
"learning_rate": 8.120950323974082e-06,
|
|
"loss": 0.4593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650897800922394,
|
|
"step": 95,
|
|
"valid_targets_mean": 4331.4,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.15128593040847202,
|
|
"grad_norm": 0.6962988670157563,
|
|
"learning_rate": 8.552915766738662e-06,
|
|
"loss": 0.4547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22169449925422668,
|
|
"step": 100,
|
|
"valid_targets_mean": 3311.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.1588502269288956,
|
|
"grad_norm": 0.707356868247285,
|
|
"learning_rate": 8.98488120950324e-06,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24226444959640503,
|
|
"step": 105,
|
|
"valid_targets_mean": 3727.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.1664145234493192,
|
|
"grad_norm": 0.6949250505569831,
|
|
"learning_rate": 9.41684665226782e-06,
|
|
"loss": 0.4523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14988040924072266,
|
|
"step": 110,
|
|
"valid_targets_mean": 2446.8,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 0.17397881996974282,
|
|
"grad_norm": 0.6625324502426455,
|
|
"learning_rate": 9.848812095032398e-06,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043588012456894,
|
|
"step": 115,
|
|
"valid_targets_mean": 3999.4,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 0.1815431164901664,
|
|
"grad_norm": 0.6610098762662052,
|
|
"learning_rate": 1.0280777537796978e-05,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20789556205272675,
|
|
"step": 120,
|
|
"valid_targets_mean": 3508.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.18910741301059,
|
|
"grad_norm": 0.7345869943074439,
|
|
"learning_rate": 1.0712742980561557e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20451787114143372,
|
|
"step": 125,
|
|
"valid_targets_mean": 3092.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.19667170953101362,
|
|
"grad_norm": 0.6726420323762144,
|
|
"learning_rate": 1.1144708423326134e-05,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.179059699177742,
|
|
"step": 130,
|
|
"valid_targets_mean": 3355.1,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 0.2042360060514372,
|
|
"grad_norm": 0.6524997819161236,
|
|
"learning_rate": 1.1576673866090712e-05,
|
|
"loss": 0.3945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184405118227005,
|
|
"step": 135,
|
|
"valid_targets_mean": 4212.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.2118003025718608,
|
|
"grad_norm": 0.7624372264100737,
|
|
"learning_rate": 1.2008639308855293e-05,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24388937652111053,
|
|
"step": 140,
|
|
"valid_targets_mean": 3863.5,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 0.21936459909228442,
|
|
"grad_norm": 0.6931652269862953,
|
|
"learning_rate": 1.2440604751619871e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236379086971283,
|
|
"step": 145,
|
|
"valid_targets_mean": 4114.8,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 0.22692889561270801,
|
|
"grad_norm": 0.7186947073353663,
|
|
"learning_rate": 1.287257019438445e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19788280129432678,
|
|
"step": 150,
|
|
"valid_targets_mean": 3607.9,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.2344931921331316,
|
|
"grad_norm": 0.8201035426475958,
|
|
"learning_rate": 1.330453563714903e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22511398792266846,
|
|
"step": 155,
|
|
"valid_targets_mean": 3547.9,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.24205748865355523,
|
|
"grad_norm": 0.6702497039705089,
|
|
"learning_rate": 1.3736501079913609e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21128690242767334,
|
|
"step": 160,
|
|
"valid_targets_mean": 3969.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.24962178517397882,
|
|
"grad_norm": 0.6309649454863937,
|
|
"learning_rate": 1.4168466522678186e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14940249919891357,
|
|
"step": 165,
|
|
"valid_targets_mean": 3139.2,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.25718608169440244,
|
|
"grad_norm": 0.6793232809454998,
|
|
"learning_rate": 1.4600431965442764e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18487021327018738,
|
|
"step": 170,
|
|
"valid_targets_mean": 3569.2,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.264750378214826,
|
|
"grad_norm": 0.7222270504909548,
|
|
"learning_rate": 1.5032397408207345e-05,
|
|
"loss": 0.3834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998199075460434,
|
|
"step": 175,
|
|
"valid_targets_mean": 2986.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.2723146747352496,
|
|
"grad_norm": 0.6120760153731338,
|
|
"learning_rate": 1.5464362850971925e-05,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1606714427471161,
|
|
"step": 180,
|
|
"valid_targets_mean": 4282.1,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 0.27987897125567324,
|
|
"grad_norm": 0.6474112989820785,
|
|
"learning_rate": 1.5896328293736503e-05,
|
|
"loss": 0.3834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16384312510490417,
|
|
"step": 185,
|
|
"valid_targets_mean": 2939.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.2874432677760968,
|
|
"grad_norm": 0.6745080597127265,
|
|
"learning_rate": 1.6328293736501082e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1786409467458725,
|
|
"step": 190,
|
|
"valid_targets_mean": 3622.0,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 0.2950075642965204,
|
|
"grad_norm": 0.6419603609963925,
|
|
"learning_rate": 1.676025917926566e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18762117624282837,
|
|
"step": 195,
|
|
"valid_targets_mean": 4044.2,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.30257186081694404,
|
|
"grad_norm": 0.6736318030773047,
|
|
"learning_rate": 1.719222462203024e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16203483939170837,
|
|
"step": 200,
|
|
"valid_targets_mean": 3135.5,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 0.3101361573373676,
|
|
"grad_norm": 0.6037972553270124,
|
|
"learning_rate": 1.7624190064794818e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19808697700500488,
|
|
"step": 205,
|
|
"valid_targets_mean": 5219.6,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 0.3177004538577912,
|
|
"grad_norm": 0.7024267152288795,
|
|
"learning_rate": 1.8056155507559396e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13386081159114838,
|
|
"step": 210,
|
|
"valid_targets_mean": 2898.5,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.32526475037821484,
|
|
"grad_norm": 0.6164104369080964,
|
|
"learning_rate": 1.8488120950323975e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530465930700302,
|
|
"step": 215,
|
|
"valid_targets_mean": 3774.9,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.3328290468986384,
|
|
"grad_norm": 0.6335438971016762,
|
|
"learning_rate": 1.8920086393088553e-05,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20025449991226196,
|
|
"step": 220,
|
|
"valid_targets_mean": 4278.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 0.340393343419062,
|
|
"grad_norm": 0.6875602376558361,
|
|
"learning_rate": 1.9352051835853135e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2185327410697937,
|
|
"step": 225,
|
|
"valid_targets_mean": 3992.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.34795763993948564,
|
|
"grad_norm": 0.69524668348569,
|
|
"learning_rate": 1.9784017278617714e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17016160488128662,
|
|
"step": 230,
|
|
"valid_targets_mean": 3470.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.3555219364599092,
|
|
"grad_norm": 0.6478681485741377,
|
|
"learning_rate": 2.021598272138229e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1512192189693451,
|
|
"step": 235,
|
|
"valid_targets_mean": 3527.2,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 0.3630862329803328,
|
|
"grad_norm": 0.7093990957916748,
|
|
"learning_rate": 2.064794816414687e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18604549765586853,
|
|
"step": 240,
|
|
"valid_targets_mean": 3504.2,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.37065052950075644,
|
|
"grad_norm": 0.7135969359959522,
|
|
"learning_rate": 2.107991360691145e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17303326725959778,
|
|
"step": 245,
|
|
"valid_targets_mean": 2969.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 0.37821482602118,
|
|
"grad_norm": 0.5742485926423301,
|
|
"learning_rate": 2.1511879049676025e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16884852945804596,
|
|
"step": 250,
|
|
"valid_targets_mean": 4826.1,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.3857791225416036,
|
|
"grad_norm": 0.6582888814043136,
|
|
"learning_rate": 2.1943844492440607e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20292259752750397,
|
|
"step": 255,
|
|
"valid_targets_mean": 3630.0,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 0.39334341906202724,
|
|
"grad_norm": 0.7343871776860735,
|
|
"learning_rate": 2.2375809935205186e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18831604719161987,
|
|
"step": 260,
|
|
"valid_targets_mean": 3643.0,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.4009077155824508,
|
|
"grad_norm": 0.7583905392384729,
|
|
"learning_rate": 2.2807775377969764e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532793879508972,
|
|
"step": 265,
|
|
"valid_targets_mean": 3713.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.4084720121028744,
|
|
"grad_norm": 0.7018525890469173,
|
|
"learning_rate": 2.3239740820734343e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14669430255889893,
|
|
"step": 270,
|
|
"valid_targets_mean": 3309.6,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 0.41603630862329805,
|
|
"grad_norm": 0.7282211146349219,
|
|
"learning_rate": 2.3671706263498925e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15649844706058502,
|
|
"step": 275,
|
|
"valid_targets_mean": 2469.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.4236006051437216,
|
|
"grad_norm": 0.6969562939671974,
|
|
"learning_rate": 2.41036717062635e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11529238522052765,
|
|
"step": 280,
|
|
"valid_targets_mean": 1252.9,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.43116490166414523,
|
|
"grad_norm": 0.7114486860910316,
|
|
"learning_rate": 2.453563714902808e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17446640133857727,
|
|
"step": 285,
|
|
"valid_targets_mean": 4056.8,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 0.43872919818456885,
|
|
"grad_norm": 1.3279141840021946,
|
|
"learning_rate": 2.496760259179266e-05,
|
|
"loss": 0.3564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20664283633232117,
|
|
"step": 290,
|
|
"valid_targets_mean": 3959.2,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.4462934947049924,
|
|
"grad_norm": 0.6524792805515398,
|
|
"learning_rate": 2.5399568034557236e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.168576180934906,
|
|
"step": 295,
|
|
"valid_targets_mean": 4168.5,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 0.45385779122541603,
|
|
"grad_norm": 0.634016254652694,
|
|
"learning_rate": 2.5831533477321818e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17431049048900604,
|
|
"step": 300,
|
|
"valid_targets_mean": 4252.0,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 0.46142208774583965,
|
|
"grad_norm": 0.6866263794972928,
|
|
"learning_rate": 2.6263498920086393e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459122598171234,
|
|
"step": 305,
|
|
"valid_targets_mean": 2517.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 0.4689863842662632,
|
|
"grad_norm": 0.6519020636945615,
|
|
"learning_rate": 2.6695464362850975e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18223246932029724,
|
|
"step": 310,
|
|
"valid_targets_mean": 3845.5,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.47655068078668683,
|
|
"grad_norm": 0.7100686083637693,
|
|
"learning_rate": 2.7127429805615553e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17315217852592468,
|
|
"step": 315,
|
|
"valid_targets_mean": 3531.5,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 0.48411497730711045,
|
|
"grad_norm": 0.7811795045556823,
|
|
"learning_rate": 2.755939524838013e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15351471304893494,
|
|
"step": 320,
|
|
"valid_targets_mean": 3190.1,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 0.491679273827534,
|
|
"grad_norm": 0.5888978989871853,
|
|
"learning_rate": 2.799136069114471e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18713781237602234,
|
|
"step": 325,
|
|
"valid_targets_mean": 4044.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.49924357034795763,
|
|
"grad_norm": 0.6420999696439788,
|
|
"learning_rate": 2.842332613390929e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1547628939151764,
|
|
"step": 330,
|
|
"valid_targets_mean": 3896.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 0.5068078668683812,
|
|
"grad_norm": 0.6529161178872602,
|
|
"learning_rate": 2.885529157667387e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16434447467327118,
|
|
"step": 335,
|
|
"valid_targets_mean": 3358.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 0.5143721633888049,
|
|
"grad_norm": 0.77090608755777,
|
|
"learning_rate": 2.9287257019438446e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20925772190093994,
|
|
"step": 340,
|
|
"valid_targets_mean": 3736.1,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.5219364599092284,
|
|
"grad_norm": 0.6610827686505704,
|
|
"learning_rate": 2.9719222462203028e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16555149853229523,
|
|
"step": 345,
|
|
"valid_targets_mean": 3391.5,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.529500756429652,
|
|
"grad_norm": 0.6823429245069436,
|
|
"learning_rate": 3.0151187904967603e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17984899878501892,
|
|
"step": 350,
|
|
"valid_targets_mean": 3385.2,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.5370650529500757,
|
|
"grad_norm": 0.6288939184011408,
|
|
"learning_rate": 3.058315334773218e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15081320703029633,
|
|
"step": 355,
|
|
"valid_targets_mean": 3610.8,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.5446293494704992,
|
|
"grad_norm": 0.6708091274900803,
|
|
"learning_rate": 3.101511879049676e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862582564353943,
|
|
"step": 360,
|
|
"valid_targets_mean": 4650.1,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 0.5521936459909228,
|
|
"grad_norm": 0.6481435646426396,
|
|
"learning_rate": 3.144708423326134e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19228947162628174,
|
|
"step": 365,
|
|
"valid_targets_mean": 3970.6,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 0.5597579425113465,
|
|
"grad_norm": 0.6486428682614003,
|
|
"learning_rate": 3.1879049676025925e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14083923399448395,
|
|
"step": 370,
|
|
"valid_targets_mean": 3391.4,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 0.56732223903177,
|
|
"grad_norm": 0.6939937646151824,
|
|
"learning_rate": 3.23110151187905e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467263102531433,
|
|
"step": 375,
|
|
"valid_targets_mean": 2980.8,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.5748865355521936,
|
|
"grad_norm": 0.6684009912690047,
|
|
"learning_rate": 3.274298056155508e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1928020417690277,
|
|
"step": 380,
|
|
"valid_targets_mean": 4745.1,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 0.5824508320726173,
|
|
"grad_norm": 0.7436143836333297,
|
|
"learning_rate": 3.317494600431966e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17164814472198486,
|
|
"step": 385,
|
|
"valid_targets_mean": 3060.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 0.5900151285930408,
|
|
"grad_norm": 1.2284759170022579,
|
|
"learning_rate": 3.360691144708423e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655544638633728,
|
|
"step": 390,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.5975794251134644,
|
|
"grad_norm": 0.6626656566131616,
|
|
"learning_rate": 3.4038876889848814e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12429247796535492,
|
|
"step": 395,
|
|
"valid_targets_mean": 2372.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.6051437216338881,
|
|
"grad_norm": 0.6744699381801939,
|
|
"learning_rate": 3.447084233261339e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14638054370880127,
|
|
"step": 400,
|
|
"valid_targets_mean": 3637.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 0.6127080181543116,
|
|
"grad_norm": 0.6614362518797119,
|
|
"learning_rate": 3.490280777537797e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16557490825653076,
|
|
"step": 405,
|
|
"valid_targets_mean": 3212.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.6202723146747352,
|
|
"grad_norm": 0.6318393689583978,
|
|
"learning_rate": 3.533477321814255e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14659368991851807,
|
|
"step": 410,
|
|
"valid_targets_mean": 2996.1,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 0.6278366111951589,
|
|
"grad_norm": 0.6661238212562043,
|
|
"learning_rate": 3.5766738660907135e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19773301482200623,
|
|
"step": 415,
|
|
"valid_targets_mean": 3845.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.6354009077155824,
|
|
"grad_norm": 0.6573985228434528,
|
|
"learning_rate": 3.619870410367171e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365024447441101,
|
|
"step": 420,
|
|
"valid_targets_mean": 3567.6,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.642965204236006,
|
|
"grad_norm": 0.7355987658116938,
|
|
"learning_rate": 3.6630669546436286e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16251324117183685,
|
|
"step": 425,
|
|
"valid_targets_mean": 3328.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.6505295007564297,
|
|
"grad_norm": 0.6856508875205136,
|
|
"learning_rate": 3.706263498920087e-05,
|
|
"loss": 0.3559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21238085627555847,
|
|
"step": 430,
|
|
"valid_targets_mean": 4338.9,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 0.6580937972768532,
|
|
"grad_norm": 0.7585950868114708,
|
|
"learning_rate": 3.749460043196544e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16705036163330078,
|
|
"step": 435,
|
|
"valid_targets_mean": 3008.5,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.6656580937972768,
|
|
"grad_norm": 0.6385883834585737,
|
|
"learning_rate": 3.7926565874730025e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1700981855392456,
|
|
"step": 440,
|
|
"valid_targets_mean": 3472.2,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.6732223903177005,
|
|
"grad_norm": 0.6908216616457553,
|
|
"learning_rate": 3.83585313174946e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17547403275966644,
|
|
"step": 445,
|
|
"valid_targets_mean": 2822.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.680786686838124,
|
|
"grad_norm": 0.5768981735848281,
|
|
"learning_rate": 3.879049676025918e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16128405928611755,
|
|
"step": 450,
|
|
"valid_targets_mean": 4008.5,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 0.6883509833585476,
|
|
"grad_norm": 0.6185299845214693,
|
|
"learning_rate": 3.9222462203023764e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13768109679222107,
|
|
"step": 455,
|
|
"valid_targets_mean": 3495.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.6959152798789713,
|
|
"grad_norm": 0.6095427501852579,
|
|
"learning_rate": 3.965442764578834e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14439386129379272,
|
|
"step": 460,
|
|
"valid_targets_mean": 4412.1,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 0.7034795763993948,
|
|
"grad_norm": 0.5682553933343372,
|
|
"learning_rate": 3.9999994307824485e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434485912322998,
|
|
"step": 465,
|
|
"valid_targets_mean": 3669.8,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.7110438729198184,
|
|
"grad_norm": 0.6032525368704863,
|
|
"learning_rate": 3.9999795082021543e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14730523526668549,
|
|
"step": 470,
|
|
"valid_targets_mean": 3255.6,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.7186081694402421,
|
|
"grad_norm": 0.6497421107706156,
|
|
"learning_rate": 3.999931125068276e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15857839584350586,
|
|
"step": 475,
|
|
"valid_targets_mean": 3483.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.7261724659606656,
|
|
"grad_norm": 0.5498298104543555,
|
|
"learning_rate": 3.9998542820693246e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15253199636936188,
|
|
"step": 480,
|
|
"valid_targets_mean": 4467.4,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.7337367624810892,
|
|
"grad_norm": 0.536742222676092,
|
|
"learning_rate": 3.9997489802988096e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1675170660018921,
|
|
"step": 485,
|
|
"valid_targets_mean": 4580.9,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 0.7413010590015129,
|
|
"grad_norm": 0.5976406077878599,
|
|
"learning_rate": 3.9996152212552195e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14526274800300598,
|
|
"step": 490,
|
|
"valid_targets_mean": 3629.5,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 0.7488653555219364,
|
|
"grad_norm": 0.6657479469231001,
|
|
"learning_rate": 3.999453006842002e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14529913663864136,
|
|
"step": 495,
|
|
"valid_targets_mean": 2441.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.75642965204236,
|
|
"grad_norm": 0.7272693226461541,
|
|
"learning_rate": 3.999262339367536e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23437121510505676,
|
|
"step": 500,
|
|
"valid_targets_mean": 2625.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 0.7639939485627837,
|
|
"grad_norm": 0.624212275980452,
|
|
"learning_rate": 3.9990432215451006e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1588977724313736,
|
|
"step": 505,
|
|
"valid_targets_mean": 3707.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.7715582450832073,
|
|
"grad_norm": 0.6189725381883429,
|
|
"learning_rate": 3.998795656492836e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18527361750602722,
|
|
"step": 510,
|
|
"valid_targets_mean": 4471.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.7791225416036308,
|
|
"grad_norm": 0.686486928196184,
|
|
"learning_rate": 3.998519647733696e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27271604537963867,
|
|
"step": 515,
|
|
"valid_targets_mean": 3787.1,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.7866868381240545,
|
|
"grad_norm": 0.6068863789873196,
|
|
"learning_rate": 3.998215199195403e-05,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13278502225875854,
|
|
"step": 520,
|
|
"valid_targets_mean": 3521.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.794251134644478,
|
|
"grad_norm": 0.6306678848425313,
|
|
"learning_rate": 3.997882315210388e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16682612895965576,
|
|
"step": 525,
|
|
"valid_targets_mean": 3134.0,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.8018154311649016,
|
|
"grad_norm": 0.5838804668738039,
|
|
"learning_rate": 3.997521000515731e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22742576897144318,
|
|
"step": 530,
|
|
"valid_targets_mean": 5804.8,
|
|
"valid_targets_min": 3891
|
|
},
|
|
{
|
|
"epoch": 0.8093797276853253,
|
|
"grad_norm": 0.5947358689372905,
|
|
"learning_rate": 3.997131260253092e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17321552336215973,
|
|
"step": 535,
|
|
"valid_targets_mean": 4714.2,
|
|
"valid_targets_min": 4077
|
|
},
|
|
{
|
|
"epoch": 0.8169440242057489,
|
|
"grad_norm": 0.5833696174622485,
|
|
"learning_rate": 3.9967130999686405e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15022411942481995,
|
|
"step": 540,
|
|
"valid_targets_mean": 3788.6,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 0.8245083207261724,
|
|
"grad_norm": 0.81120921560823,
|
|
"learning_rate": 3.996266525612973e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1640307605266571,
|
|
"step": 545,
|
|
"valid_targets_mean": 3354.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.8320726172465961,
|
|
"grad_norm": 0.5882280424921178,
|
|
"learning_rate": 3.9957915435410334e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15542134642601013,
|
|
"step": 550,
|
|
"valid_targets_mean": 4565.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.8396369137670197,
|
|
"grad_norm": 0.5114261672632754,
|
|
"learning_rate": 3.995288160512015e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12919434905052185,
|
|
"step": 555,
|
|
"valid_targets_mean": 3702.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.8472012102874432,
|
|
"grad_norm": 0.71798988141457,
|
|
"learning_rate": 3.9947563836892725e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15368886291980743,
|
|
"step": 560,
|
|
"valid_targets_mean": 2106.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.8547655068078669,
|
|
"grad_norm": 0.4830787856281229,
|
|
"learning_rate": 3.994196220640214e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14742343127727509,
|
|
"step": 565,
|
|
"valid_targets_mean": 4509.1,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.8623298033282905,
|
|
"grad_norm": 0.5101157756195189,
|
|
"learning_rate": 3.993607679336197e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466086506843567,
|
|
"step": 570,
|
|
"valid_targets_mean": 4172.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.869894099848714,
|
|
"grad_norm": 0.6432703866738241,
|
|
"learning_rate": 3.992990768152412e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943899691104889,
|
|
"step": 575,
|
|
"valid_targets_mean": 3474.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.8774583963691377,
|
|
"grad_norm": 0.5391651752010661,
|
|
"learning_rate": 3.9923454958677676e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11259106546640396,
|
|
"step": 580,
|
|
"valid_targets_mean": 3001.9,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 0.8850226928895613,
|
|
"grad_norm": 0.6166785705495946,
|
|
"learning_rate": 3.991671871664759e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15584465861320496,
|
|
"step": 585,
|
|
"valid_targets_mean": 3479.9,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.8925869894099848,
|
|
"grad_norm": 0.5433936203435179,
|
|
"learning_rate": 3.9909699051293455e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293371319770813,
|
|
"step": 590,
|
|
"valid_targets_mean": 3841.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.9001512859304085,
|
|
"grad_norm": 0.6267969944242441,
|
|
"learning_rate": 3.990239606250805e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170243039727211,
|
|
"step": 595,
|
|
"valid_targets_mean": 3409.4,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 0.9077155824508321,
|
|
"grad_norm": 0.6399162825236907,
|
|
"learning_rate": 3.989480985421602e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16275428235530853,
|
|
"step": 600,
|
|
"valid_targets_mean": 3094.6,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 0.9152798789712556,
|
|
"grad_norm": 0.6379282854969726,
|
|
"learning_rate": 3.988694053437229e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298145055770874,
|
|
"step": 605,
|
|
"valid_targets_mean": 3187.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.9228441754916793,
|
|
"grad_norm": 0.6755356365043045,
|
|
"learning_rate": 3.987878821496062e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1783760040998459,
|
|
"step": 610,
|
|
"valid_targets_mean": 3242.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 0.9304084720121029,
|
|
"grad_norm": 0.5445828924539011,
|
|
"learning_rate": 3.9870353011991955e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14102879166603088,
|
|
"step": 615,
|
|
"valid_targets_mean": 3280.8,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 0.9379727685325264,
|
|
"grad_norm": 0.6905607504197001,
|
|
"learning_rate": 3.986163504550281e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16626963019371033,
|
|
"step": 620,
|
|
"valid_targets_mean": 2319.5,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 0.9455370650529501,
|
|
"grad_norm": 0.5914978577275282,
|
|
"learning_rate": 3.985263443955351e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313735842704773,
|
|
"step": 625,
|
|
"valid_targets_mean": 3204.0,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.9531013615733737,
|
|
"grad_norm": 0.5094724078921259,
|
|
"learning_rate": 3.9843351322226496e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20243233442306519,
|
|
"step": 630,
|
|
"valid_targets_mean": 4687.1,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.9606656580937972,
|
|
"grad_norm": 0.6767216202174653,
|
|
"learning_rate": 3.983378582562446e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16598618030548096,
|
|
"step": 635,
|
|
"valid_targets_mean": 2904.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.9682299546142209,
|
|
"grad_norm": 0.5306229369651405,
|
|
"learning_rate": 3.982393808586843e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13321392238140106,
|
|
"step": 640,
|
|
"valid_targets_mean": 3856.1,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 0.9757942511346445,
|
|
"grad_norm": 0.4962370111112916,
|
|
"learning_rate": 3.981380824309594e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12012765556573868,
|
|
"step": 645,
|
|
"valid_targets_mean": 3530.8,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.983358547655068,
|
|
"grad_norm": 0.5453410627540449,
|
|
"learning_rate": 3.9803396441458917e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15401634573936462,
|
|
"step": 650,
|
|
"valid_targets_mean": 4408.0,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.9909228441754917,
|
|
"grad_norm": 0.5758031964619608,
|
|
"learning_rate": 3.979270282912169e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14203205704689026,
|
|
"step": 655,
|
|
"valid_targets_mean": 3325.1,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 0.9984871406959153,
|
|
"grad_norm": 0.5526645542382672,
|
|
"learning_rate": 3.9781727558258896e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13224758207798004,
|
|
"step": 660,
|
|
"valid_targets_mean": 3756.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.006051437216339,
|
|
"grad_norm": 0.5891879290997246,
|
|
"learning_rate": 3.977047078505327e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456419974565506,
|
|
"step": 665,
|
|
"valid_targets_mean": 3990.0,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.0136157337367624,
|
|
"grad_norm": 0.575248945798663,
|
|
"learning_rate": 3.975893266969346e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434870958328247,
|
|
"step": 670,
|
|
"valid_targets_mean": 3554.5,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.021180030257186,
|
|
"grad_norm": 0.6912198188707248,
|
|
"learning_rate": 3.9747113376371704e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17257359623908997,
|
|
"step": 675,
|
|
"valid_targets_mean": 4169.5,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 1.0287443267776097,
|
|
"grad_norm": 0.607993400000161,
|
|
"learning_rate": 3.9735013073281564e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1315409392118454,
|
|
"step": 680,
|
|
"valid_targets_mean": 3552.1,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.0363086232980332,
|
|
"grad_norm": 0.570904267673167,
|
|
"learning_rate": 3.972263193261545e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11718397587537766,
|
|
"step": 685,
|
|
"valid_targets_mean": 4567.6,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 1.0438729198184569,
|
|
"grad_norm": 0.5515154186398548,
|
|
"learning_rate": 3.970997013056224e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15792353451251984,
|
|
"step": 690,
|
|
"valid_targets_mean": 5060.4,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 1.0514372163388805,
|
|
"grad_norm": 0.660283817395426,
|
|
"learning_rate": 3.969702784730471e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16387608647346497,
|
|
"step": 695,
|
|
"valid_targets_mean": 3472.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.059001512859304,
|
|
"grad_norm": 0.6620083958429561,
|
|
"learning_rate": 3.9683805267017035e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15107467770576477,
|
|
"step": 700,
|
|
"valid_targets_mean": 3754.9,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 1.0665658093797277,
|
|
"grad_norm": 0.6138817165808352,
|
|
"learning_rate": 3.9670302577862124e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13340693712234497,
|
|
"step": 705,
|
|
"valid_targets_mean": 2959.4,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.0741301059001513,
|
|
"grad_norm": 0.7876604770860314,
|
|
"learning_rate": 3.965651997198893e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033415138721466,
|
|
"step": 710,
|
|
"valid_targets_mean": 3335.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.0816944024205748,
|
|
"grad_norm": 0.6159737190020937,
|
|
"learning_rate": 3.964245764552978e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1460169553756714,
|
|
"step": 715,
|
|
"valid_targets_mean": 3525.1,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.0892586989409985,
|
|
"grad_norm": 0.5889828546910099,
|
|
"learning_rate": 3.9628115798597505e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1535705178976059,
|
|
"step": 720,
|
|
"valid_targets_mean": 3852.1,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.0968229954614221,
|
|
"grad_norm": 0.5491030975200422,
|
|
"learning_rate": 3.961349463528266e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425220549106598,
|
|
"step": 725,
|
|
"valid_targets_mean": 3479.8,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 1.1043872919818456,
|
|
"grad_norm": 0.7826966429303139,
|
|
"learning_rate": 3.959859436365057e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15150192379951477,
|
|
"step": 730,
|
|
"valid_targets_mean": 2795.4,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 1.1119515885022693,
|
|
"grad_norm": 0.538346921663493,
|
|
"learning_rate": 3.95834151957384e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14208990335464478,
|
|
"step": 735,
|
|
"valid_targets_mean": 4193.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 1.119515885022693,
|
|
"grad_norm": 0.5916924950144173,
|
|
"learning_rate": 3.956795734755213e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365230590105057,
|
|
"step": 740,
|
|
"valid_targets_mean": 2171.0,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.1270801815431164,
|
|
"grad_norm": 0.4940751710256907,
|
|
"learning_rate": 3.955222103906346e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17642667889595032,
|
|
"step": 745,
|
|
"valid_targets_mean": 4838.8,
|
|
"valid_targets_min": 2570
|
|
},
|
|
{
|
|
"epoch": 1.13464447806354,
|
|
"grad_norm": 0.5613307296045817,
|
|
"learning_rate": 3.953620649420672e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829761743545532,
|
|
"step": 750,
|
|
"valid_targets_mean": 3901.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.1422087745839637,
|
|
"grad_norm": 0.6687267939494371,
|
|
"learning_rate": 3.951991394087565e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10808304697275162,
|
|
"step": 755,
|
|
"valid_targets_mean": 2543.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.1497730711043872,
|
|
"grad_norm": 0.6479787855172182,
|
|
"learning_rate": 3.950334361092016e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1535249501466751,
|
|
"step": 760,
|
|
"valid_targets_mean": 4048.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.1573373676248109,
|
|
"grad_norm": 0.5400224121868525,
|
|
"learning_rate": 3.948649574014306e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09261585772037506,
|
|
"step": 765,
|
|
"valid_targets_mean": 2304.5,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.1649016641452345,
|
|
"grad_norm": 0.6128330349700599,
|
|
"learning_rate": 3.946937056829666e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19956746697425842,
|
|
"step": 770,
|
|
"valid_targets_mean": 4002.0,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 1.172465960665658,
|
|
"grad_norm": 0.8101914278410806,
|
|
"learning_rate": 3.9451968339079405e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488063216209412,
|
|
"step": 775,
|
|
"valid_targets_mean": 2670.8,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.1800302571860817,
|
|
"grad_norm": 0.5898677864060057,
|
|
"learning_rate": 3.9434289300132355e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12725120782852173,
|
|
"step": 780,
|
|
"valid_targets_mean": 2991.0,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.1875945537065054,
|
|
"grad_norm": 0.6549465769078141,
|
|
"learning_rate": 3.941633370303572e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14529456198215485,
|
|
"step": 785,
|
|
"valid_targets_mean": 2386.8,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 1.1951588502269288,
|
|
"grad_norm": 0.570676838970218,
|
|
"learning_rate": 3.939810180330523e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15319663286209106,
|
|
"step": 790,
|
|
"valid_targets_mean": 4168.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 1.2027231467473525,
|
|
"grad_norm": 0.5143944174558078,
|
|
"learning_rate": 3.9379593860388515e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1717936098575592,
|
|
"step": 795,
|
|
"valid_targets_mean": 5347.1,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 1.2102874432677762,
|
|
"grad_norm": 0.6158927498069106,
|
|
"learning_rate": 3.936081013766143e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15087324380874634,
|
|
"step": 800,
|
|
"valid_targets_mean": 3492.9,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.2178517397881996,
|
|
"grad_norm": 0.5476568233816599,
|
|
"learning_rate": 3.9341750902424294e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12351877987384796,
|
|
"step": 805,
|
|
"valid_targets_mean": 3530.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.2254160363086233,
|
|
"grad_norm": 0.578185363099471,
|
|
"learning_rate": 3.932241642589807e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382569968700409,
|
|
"step": 810,
|
|
"valid_targets_mean": 3259.5,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.232980332829047,
|
|
"grad_norm": 0.5337890815610976,
|
|
"learning_rate": 3.930280698322053e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17424482107162476,
|
|
"step": 815,
|
|
"valid_targets_mean": 4801.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.2405446293494704,
|
|
"grad_norm": 0.5234772133788091,
|
|
"learning_rate": 3.928292285344234e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14880666136741638,
|
|
"step": 820,
|
|
"valid_targets_mean": 3875.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.248108925869894,
|
|
"grad_norm": 0.5449545769700447,
|
|
"learning_rate": 3.926276431952306e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11567376554012299,
|
|
"step": 825,
|
|
"valid_targets_mean": 2764.0,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.2556732223903178,
|
|
"grad_norm": 0.5623706203037284,
|
|
"learning_rate": 3.924233166832714e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12986469268798828,
|
|
"step": 830,
|
|
"valid_targets_mean": 2924.0,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.2632375189107412,
|
|
"grad_norm": 0.6543013147085076,
|
|
"learning_rate": 3.922162519061986e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11933982372283936,
|
|
"step": 835,
|
|
"valid_targets_mean": 1984.6,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.2708018154311649,
|
|
"grad_norm": 0.5441109941227669,
|
|
"learning_rate": 3.920064518106313e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20975258946418762,
|
|
"step": 840,
|
|
"valid_targets_mean": 4732.0,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.2783661119515886,
|
|
"grad_norm": 0.631089443820177,
|
|
"learning_rate": 3.917939193821136e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13542315363883972,
|
|
"step": 845,
|
|
"valid_targets_mean": 2594.6,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.2859304084720122,
|
|
"grad_norm": 0.747332912304098,
|
|
"learning_rate": 3.915786576450719e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17319586873054504,
|
|
"step": 850,
|
|
"valid_targets_mean": 5178.8,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 1.2934947049924357,
|
|
"grad_norm": 0.596022853889303,
|
|
"learning_rate": 3.913606696627715e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1681569367647171,
|
|
"step": 855,
|
|
"valid_targets_mean": 3705.4,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.3010590015128594,
|
|
"grad_norm": 0.5886243818936712,
|
|
"learning_rate": 3.911399585372735e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153789222240448,
|
|
"step": 860,
|
|
"valid_targets_mean": 3687.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.3086232980332828,
|
|
"grad_norm": 0.5639493978697893,
|
|
"learning_rate": 3.909165274093906e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18702542781829834,
|
|
"step": 865,
|
|
"valid_targets_mean": 4203.6,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 1.3161875945537065,
|
|
"grad_norm": 0.5887285260038629,
|
|
"learning_rate": 3.906903794586422e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12023086845874786,
|
|
"step": 870,
|
|
"valid_targets_mean": 3486.6,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.3237518910741302,
|
|
"grad_norm": 0.5880542108938073,
|
|
"learning_rate": 3.9046151790320905e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1508350968360901,
|
|
"step": 875,
|
|
"valid_targets_mean": 3015.9,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.3313161875945538,
|
|
"grad_norm": 0.5617870323150751,
|
|
"learning_rate": 3.902299459998879e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1621008813381195,
|
|
"step": 880,
|
|
"valid_targets_mean": 4165.9,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 1.3388804841149773,
|
|
"grad_norm": 0.7140421949606348,
|
|
"learning_rate": 3.8999566704404476e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12724804878234863,
|
|
"step": 885,
|
|
"valid_targets_mean": 1878.4,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.346444780635401,
|
|
"grad_norm": 0.5102704073906293,
|
|
"learning_rate": 3.8975868436956826e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302634239196777,
|
|
"step": 890,
|
|
"valid_targets_mean": 4110.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 1.3540090771558244,
|
|
"grad_norm": 0.5951250209197047,
|
|
"learning_rate": 3.895190013488219e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477861851453781,
|
|
"step": 895,
|
|
"valid_targets_mean": 3516.6,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.361573373676248,
|
|
"grad_norm": 0.4873431709719797,
|
|
"learning_rate": 3.892766213925965e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555219292640686,
|
|
"step": 900,
|
|
"valid_targets_mean": 4305.0,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.3691376701966718,
|
|
"grad_norm": 0.5718613164277347,
|
|
"learning_rate": 3.890315479500611e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14315059781074524,
|
|
"step": 905,
|
|
"valid_targets_mean": 3152.4,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.3767019667170954,
|
|
"grad_norm": 0.5134742216384334,
|
|
"learning_rate": 3.887837845087144e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446826457977295,
|
|
"step": 910,
|
|
"valid_targets_mean": 3131.2,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 1.384266263237519,
|
|
"grad_norm": 0.5364553106117432,
|
|
"learning_rate": 3.885333345943349e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16806858777999878,
|
|
"step": 915,
|
|
"valid_targets_mean": 4608.6,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.3918305597579426,
|
|
"grad_norm": 0.49898626476384894,
|
|
"learning_rate": 3.882802017709307e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14404705166816711,
|
|
"step": 920,
|
|
"valid_targets_mean": 3518.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 1.399394856278366,
|
|
"grad_norm": 0.5842051540893295,
|
|
"learning_rate": 3.880243896406889e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10397547483444214,
|
|
"step": 925,
|
|
"valid_targets_mean": 1962.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.4069591527987897,
|
|
"grad_norm": 0.6497526428348704,
|
|
"learning_rate": 3.877659018439242e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490338295698166,
|
|
"step": 930,
|
|
"valid_targets_mean": 3190.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.4145234493192134,
|
|
"grad_norm": 0.4897193120055762,
|
|
"learning_rate": 3.8750474205902715e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11491852253675461,
|
|
"step": 935,
|
|
"valid_targets_mean": 3197.0,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.422087745839637,
|
|
"grad_norm": 0.5368393468330136,
|
|
"learning_rate": 3.872409140024119e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11498255282640457,
|
|
"step": 940,
|
|
"valid_targets_mean": 2878.6,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 1.4296520423600605,
|
|
"grad_norm": 0.46716120884926055,
|
|
"learning_rate": 3.8697442142846314e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13152192533016205,
|
|
"step": 945,
|
|
"valid_targets_mean": 4738.4,
|
|
"valid_targets_min": 3637
|
|
},
|
|
{
|
|
"epoch": 1.4372163388804842,
|
|
"grad_norm": 0.5902151648137144,
|
|
"learning_rate": 3.867052681294828e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21390095353126526,
|
|
"step": 950,
|
|
"valid_targets_mean": 3895.5,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 1.4447806354009076,
|
|
"grad_norm": 0.5479543096718771,
|
|
"learning_rate": 3.8643345793563606e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13981355726718903,
|
|
"step": 955,
|
|
"valid_targets_mean": 3557.8,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.4523449319213313,
|
|
"grad_norm": 0.52354714979117,
|
|
"learning_rate": 3.86158994714897e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1581888645887375,
|
|
"step": 960,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.459909228441755,
|
|
"grad_norm": 0.48680997266292647,
|
|
"learning_rate": 3.858818823729931e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12068180739879608,
|
|
"step": 965,
|
|
"valid_targets_mean": 3856.6,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 1.4674735249621786,
|
|
"grad_norm": 0.48440595009727067,
|
|
"learning_rate": 3.856021248533501e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15099260210990906,
|
|
"step": 970,
|
|
"valid_targets_mean": 4886.6,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 1.475037821482602,
|
|
"grad_norm": 0.4902069041661967,
|
|
"learning_rate": 3.853197261370357e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15750305354595184,
|
|
"step": 975,
|
|
"valid_targets_mean": 5005.1,
|
|
"valid_targets_min": 4245
|
|
},
|
|
{
|
|
"epoch": 1.4826021180030258,
|
|
"grad_norm": 0.5942892683477582,
|
|
"learning_rate": 3.850346902427031e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16730457544326782,
|
|
"step": 980,
|
|
"valid_targets_mean": 4301.4,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.4901664145234492,
|
|
"grad_norm": 0.5708113265162581,
|
|
"learning_rate": 3.847470212265334e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14457716047763824,
|
|
"step": 985,
|
|
"valid_targets_mean": 3069.5,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.497730711043873,
|
|
"grad_norm": 0.4740537032364332,
|
|
"learning_rate": 3.844567231821784e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12447857856750488,
|
|
"step": 990,
|
|
"valid_targets_mean": 4196.0,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.5052950075642966,
|
|
"grad_norm": 0.5812585696328405,
|
|
"learning_rate": 3.8416380024070175e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15900781750679016,
|
|
"step": 995,
|
|
"valid_targets_mean": 3672.1,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.5128593040847202,
|
|
"grad_norm": 0.6129097479279535,
|
|
"learning_rate": 3.838682565705209e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14210692048072815,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2188.0,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.5204236006051437,
|
|
"grad_norm": 1.0467956719616998,
|
|
"learning_rate": 3.83570096377347e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14025017619132996,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3319.6,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.5279878971255674,
|
|
"grad_norm": 0.5318918964315936,
|
|
"learning_rate": 3.8326932390412584e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322619467973709,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3064.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.5355521936459908,
|
|
"grad_norm": 0.6276027433954832,
|
|
"learning_rate": 3.829659434309765e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21672981977462769,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4366.0,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.5431164901664145,
|
|
"grad_norm": 0.6212520571592671,
|
|
"learning_rate": 3.8265995927513155e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367231011390686,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2756.5,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.5506807866868382,
|
|
"grad_norm": 0.4916371996190509,
|
|
"learning_rate": 3.823513757908748e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14618805050849915,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4781.6,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 1.5582450832072618,
|
|
"grad_norm": 0.6111669889680833,
|
|
"learning_rate": 3.820401973694796e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15221001207828522,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3315.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.5658093797276853,
|
|
"grad_norm": 0.5736030840087121,
|
|
"learning_rate": 3.817264284391464e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306094527244568,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3062.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.573373676248109,
|
|
"grad_norm": 0.491660647633266,
|
|
"learning_rate": 3.8141007346493964e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11430300772190094,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3601.4,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.5809379727685324,
|
|
"grad_norm": 0.4766038085571225,
|
|
"learning_rate": 3.8109113694872436e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13275161385536194,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3896.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.588502269288956,
|
|
"grad_norm": 0.5048728598702531,
|
|
"learning_rate": 3.80769623429102e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12037082016468048,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3743.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.5960665658093798,
|
|
"grad_norm": 0.5174450601649762,
|
|
"learning_rate": 3.804455374813456e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10486027598381042,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3328.8,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.6036308623298035,
|
|
"grad_norm": 0.507478940328461,
|
|
"learning_rate": 3.8011888371733536e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10818983614444733,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2585.8,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.611195158850227,
|
|
"grad_norm": 0.5842112883454711,
|
|
"learning_rate": 3.797896667854924e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13165339827537537,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3046.5,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.6187594553706506,
|
|
"grad_norm": 0.792558436109088,
|
|
"learning_rate": 3.7945789137071264e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12533298134803772,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3904.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.626323751891074,
|
|
"grad_norm": 0.5665661593211339,
|
|
"learning_rate": 3.791235621943005e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1565142273902893,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4011.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.6338880484114977,
|
|
"grad_norm": 0.5513575320946164,
|
|
"learning_rate": 3.7878668401390157e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655036062002182,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3326.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.6414523449319214,
|
|
"grad_norm": 0.5237714288880337,
|
|
"learning_rate": 3.784472616234345e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16192521154880524,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4234.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.649016641452345,
|
|
"grad_norm": 0.5723168454021307,
|
|
"learning_rate": 3.7810529985302354e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14287251234054565,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4324.6,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.6565809379727685,
|
|
"grad_norm": 0.6162374207812991,
|
|
"learning_rate": 3.77760803568929e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26747292280197144,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4191.6,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.6641452344931922,
|
|
"grad_norm": 0.4658962048853896,
|
|
"learning_rate": 3.774137776734788e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12173908948898315,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4201.1,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.6717095310136156,
|
|
"grad_norm": 0.5225282345315402,
|
|
"learning_rate": 3.770642271049979e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403307318687439,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3602.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.6792738275340393,
|
|
"grad_norm": 0.5152786681401058,
|
|
"learning_rate": 3.767121568377387e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16044282913208008,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4193.2,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.686838124054463,
|
|
"grad_norm": 0.4928850246858111,
|
|
"learning_rate": 3.763575718818099e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12836617231369019,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3792.9,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 1.6944024205748867,
|
|
"grad_norm": 0.49708820145822324,
|
|
"learning_rate": 3.760004772831052e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14837561547756195,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4082.1,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.70196671709531,
|
|
"grad_norm": 0.6849561836625447,
|
|
"learning_rate": 3.7564087812323176e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16422055661678314,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3121.6,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.7095310136157338,
|
|
"grad_norm": 0.5237380145331534,
|
|
"learning_rate": 3.7527877951943745e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14432930946350098,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3510.9,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.7170953101361572,
|
|
"grad_norm": 0.46733858307173304,
|
|
"learning_rate": 3.749141866245385e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338130533695221,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3974.0,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 1.724659606656581,
|
|
"grad_norm": 0.49553175919022546,
|
|
"learning_rate": 3.745471046268459e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13524213433265686,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3547.9,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 1.7322239031770046,
|
|
"grad_norm": 0.5382267547746539,
|
|
"learning_rate": 3.7417753875009156e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1173422783613205,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3090.9,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.7397881996974283,
|
|
"grad_norm": 0.500622919048972,
|
|
"learning_rate": 3.738054942533541e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13919296860694885,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3813.8,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.7473524962178517,
|
|
"grad_norm": 0.557737787694099,
|
|
"learning_rate": 3.734309764309839e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1690666377544403,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3099.4,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 1.7549167927382754,
|
|
"grad_norm": 0.507366970910663,
|
|
"learning_rate": 3.7305399061252795e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12403805553913116,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3125.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.7624810892586988,
|
|
"grad_norm": 0.5189012437594942,
|
|
"learning_rate": 3.726745421626537e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12479718029499054,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2891.2,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.7700453857791225,
|
|
"grad_norm": 0.4952079985685294,
|
|
"learning_rate": 3.7229263648107285e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10737426578998566,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3150.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.7776096822995462,
|
|
"grad_norm": 0.5851336797807382,
|
|
"learning_rate": 3.7190827900246474e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.175589457154274,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3453.5,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.7851739788199699,
|
|
"grad_norm": 0.5782162701950824,
|
|
"learning_rate": 3.715214751963987e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20500090718269348,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3863.1,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 1.7927382753403933,
|
|
"grad_norm": 0.5594480378041363,
|
|
"learning_rate": 3.711322305672563e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16368158161640167,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3809.4,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 1.800302571860817,
|
|
"grad_norm": 0.5185511495452741,
|
|
"learning_rate": 3.707405506541532e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16460925340652466,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4458.1,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 1.8078668683812404,
|
|
"grad_norm": 0.5496767059587088,
|
|
"learning_rate": 3.703464410308601e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15647511184215546,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3981.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.8154311649016641,
|
|
"grad_norm": 0.5526151239157443,
|
|
"learning_rate": 3.699499073057234e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12998747825622559,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2370.1,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 1.8229954614220878,
|
|
"grad_norm": 0.48804571292570836,
|
|
"learning_rate": 3.6955095512158554e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14497433602809906,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4263.2,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 1.8305597579425115,
|
|
"grad_norm": 0.5293907743243553,
|
|
"learning_rate": 3.691495901557048e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819661557674408,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4564.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.838124054462935,
|
|
"grad_norm": 0.5805292247082431,
|
|
"learning_rate": 3.6874581811967425e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17770519852638245,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3782.5,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.8456883509833586,
|
|
"grad_norm": 0.47471391507616245,
|
|
"learning_rate": 3.683396447593406e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13768038153648376,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4206.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.853252647503782,
|
|
"grad_norm": 0.5002331468402234,
|
|
"learning_rate": 3.6793107585472234e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10712696611881256,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2834.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 1.8608169440242057,
|
|
"grad_norm": 0.5713276472038367,
|
|
"learning_rate": 3.675201172199277e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13937297463417053,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4299.1,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 1.8683812405446294,
|
|
"grad_norm": 0.5039575055507526,
|
|
"learning_rate": 3.6710677470307174e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11146534979343414,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3012.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.875945537065053,
|
|
"grad_norm": 0.4446068497697817,
|
|
"learning_rate": 3.6669105418619307e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13729849457740784,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4498.5,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.8835098335854765,
|
|
"grad_norm": 0.4491311342553811,
|
|
"learning_rate": 3.6627296158517035e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271735429763794,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4181.9,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 1.8910741301059002,
|
|
"grad_norm": 0.5391365380400753,
|
|
"learning_rate": 3.658525028496382e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12687534093856812,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2833.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.8986384266263236,
|
|
"grad_norm": 0.4793761585151755,
|
|
"learning_rate": 3.654296839629017e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10038541257381439,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3038.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.9062027231467473,
|
|
"grad_norm": 0.4954723424904824,
|
|
"learning_rate": 3.650045109418526e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13816064596176147,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3999.5,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.913767019667171,
|
|
"grad_norm": 0.5015458821941594,
|
|
"learning_rate": 3.645769898368826e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356511116027832,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3585.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.9213313161875947,
|
|
"grad_norm": 0.454063245442334,
|
|
"learning_rate": 3.641471267317976e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532512754201889,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4950.0,
|
|
"valid_targets_min": 3742
|
|
},
|
|
{
|
|
"epoch": 1.9288956127080181,
|
|
"grad_norm": 0.47925546144739684,
|
|
"learning_rate": 3.637149277437313e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13188070058822632,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3322.0,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.9364599092284418,
|
|
"grad_norm": 0.555574955517826,
|
|
"learning_rate": 3.6328039902305806e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17610427737236023,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3671.5,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.9440242057488653,
|
|
"grad_norm": 1.0223352583264924,
|
|
"learning_rate": 3.628435467533051e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14892156422138214,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3407.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.951588502269289,
|
|
"grad_norm": 0.5210353430918502,
|
|
"learning_rate": 3.624043771510647e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09024085104465485,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2253.8,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 1.9591527987897126,
|
|
"grad_norm": 0.5330913152390276,
|
|
"learning_rate": 3.619628964659061e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378554105758667,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3196.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 1.9667170953101363,
|
|
"grad_norm": 0.5311321904099634,
|
|
"learning_rate": 3.61519110980286e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13186433911323547,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3512.1,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.9742813918305597,
|
|
"grad_norm": 0.5232878150642407,
|
|
"learning_rate": 3.6107302700945925e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14067599177360535,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3315.4,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 1.9818456883509834,
|
|
"grad_norm": 0.43355802169081387,
|
|
"learning_rate": 3.6062465090138936e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10941535979509354,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3243.0,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 1.9894099848714069,
|
|
"grad_norm": 0.58516870454313,
|
|
"learning_rate": 3.6017398903665787e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14838334918022156,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3588.8,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.9969742813918305,
|
|
"grad_norm": 0.5586097850742623,
|
|
"learning_rate": 3.597210478283735e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11034709215164185,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3366.8,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 2.004538577912254,
|
|
"grad_norm": 0.5755444732499763,
|
|
"learning_rate": 3.5926583372208106e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13557398319244385,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3204.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.012102874432678,
|
|
"grad_norm": 0.5377854912394063,
|
|
"learning_rate": 3.588083531956698e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13103197515010834,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3095.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.0196671709531016,
|
|
"grad_norm": 0.5838503579088928,
|
|
"learning_rate": 3.583486127592807e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12906858325004578,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2468.5,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 2.027231467473525,
|
|
"grad_norm": 0.5781259289064704,
|
|
"learning_rate": 3.5788661895521455e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17435616254806519,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4344.8,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 2.0347957639939485,
|
|
"grad_norm": 0.5317616198165436,
|
|
"learning_rate": 3.574223783578385e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12235361337661743,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3175.8,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 2.042360060514372,
|
|
"grad_norm": 0.4576739022718804,
|
|
"learning_rate": 3.569558975734923e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489795595407486,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4646.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.049924357034796,
|
|
"grad_norm": 0.4973592653835254,
|
|
"learning_rate": 3.564871832403948e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267578601837158,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4304.1,
|
|
"valid_targets_min": 3492
|
|
},
|
|
{
|
|
"epoch": 2.0574886535552195,
|
|
"grad_norm": 0.5966283982593381,
|
|
"learning_rate": 3.560162420285489e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13036462664604187,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2414.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.065052950075643,
|
|
"grad_norm": 0.45290106735049335,
|
|
"learning_rate": 3.555430806396471e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11052821576595306,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3562.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.0726172465960664,
|
|
"grad_norm": 0.6207231913256602,
|
|
"learning_rate": 3.55067705806976e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08491264283657074,
|
|
"step": 1370,
|
|
"valid_targets_mean": 1879.1,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 2.08018154311649,
|
|
"grad_norm": 0.5428864413990756,
|
|
"learning_rate": 3.545901242953203e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15490245819091797,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3541.2,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 2.0877458396369137,
|
|
"grad_norm": 0.5054736033748285,
|
|
"learning_rate": 3.541103429008666e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14006435871124268,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4338.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.0953101361573374,
|
|
"grad_norm": 0.463123165908252,
|
|
"learning_rate": 3.5362836845110716e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09125912189483643,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3471.9,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 2.102874432677761,
|
|
"grad_norm": 0.5716050795072798,
|
|
"learning_rate": 3.5314420780474186e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11499777436256409,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2495.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 2.1104387291981848,
|
|
"grad_norm": 0.5353508752884241,
|
|
"learning_rate": 3.5265786785158145e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12236478924751282,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3474.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.118003025718608,
|
|
"grad_norm": 0.5229887187921544,
|
|
"learning_rate": 3.5216935551244896e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14113318920135498,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3358.8,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.1255673222390317,
|
|
"grad_norm": 0.5536099675899255,
|
|
"learning_rate": 3.516786777390813e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12051559239625931,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3255.0,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.1331316187594553,
|
|
"grad_norm": 0.4889179674654145,
|
|
"learning_rate": 3.511858415140307e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1547798216342926,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4776.5,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 2.140695915279879,
|
|
"grad_norm": 0.45685588547364714,
|
|
"learning_rate": 3.506908538505648e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049590051174164,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4044.5,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 2.1482602118003027,
|
|
"grad_norm": 0.5010799142012269,
|
|
"learning_rate": 3.501937217925673e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12217780947685242,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3020.6,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 2.1558245083207264,
|
|
"grad_norm": 0.4632828614066023,
|
|
"learning_rate": 3.496944524144375e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326088011264801,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4573.4,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 2.1633888048411496,
|
|
"grad_norm": 0.5191423836393423,
|
|
"learning_rate": 3.4919305282098946e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15790945291519165,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4289.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 2.1709531013615733,
|
|
"grad_norm": 0.48831539096807075,
|
|
"learning_rate": 3.486895301473515e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12832537293434143,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 2.178517397881997,
|
|
"grad_norm": 0.4448601043902942,
|
|
"learning_rate": 3.4818389155886394e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13932988047599792,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4919.5,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 2.1860816944024206,
|
|
"grad_norm": 0.5017669007637845,
|
|
"learning_rate": 3.476761442509776e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11492684483528137,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2521.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.1936459909228443,
|
|
"grad_norm": 0.4924404436780808,
|
|
"learning_rate": 3.4716629544915124e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11409447342157364,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3099.2,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.201210287443268,
|
|
"grad_norm": 0.6232561101903803,
|
|
"learning_rate": 3.4665435240874883e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471477448940277,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3599.0,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.208774583963691,
|
|
"grad_norm": 0.4380030397247709,
|
|
"learning_rate": 3.46140322414936e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11037217080593109,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3896.9,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.216338880484115,
|
|
"grad_norm": 0.5446024532285665,
|
|
"learning_rate": 3.456242127825769e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1760619878768921,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4597.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 2.2239031770045385,
|
|
"grad_norm": 0.486931818013627,
|
|
"learning_rate": 3.4510603085612984e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11286469548940659,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3086.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 2.231467473524962,
|
|
"grad_norm": 0.5132108800023386,
|
|
"learning_rate": 3.445857840095425e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13984905183315277,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3475.5,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 2.239031770045386,
|
|
"grad_norm": 0.5937057523856408,
|
|
"learning_rate": 3.4406347964614725e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12535332143306732,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2129.4,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 2.2465960665658096,
|
|
"grad_norm": 0.5312289626246242,
|
|
"learning_rate": 3.4353912519855605e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17054787278175354,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4657.2,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 2.254160363086233,
|
|
"grad_norm": 0.4939387877120143,
|
|
"learning_rate": 3.4301272812855425e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15509819984436035,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4163.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.2617246596066565,
|
|
"grad_norm": 0.5207422328784146,
|
|
"learning_rate": 3.4248429592699455e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14272922277450562,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3555.1,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 2.26928895612708,
|
|
"grad_norm": 0.5267163103093272,
|
|
"learning_rate": 3.419538361136906e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131926029920578,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3488.8,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.276853252647504,
|
|
"grad_norm": 0.5227961525908271,
|
|
"learning_rate": 3.4142135623730954e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18416054546833038,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4665.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.2844175491679275,
|
|
"grad_norm": 0.44535605284215446,
|
|
"learning_rate": 3.408868638752652e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13845394551753998,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4797.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.291981845688351,
|
|
"grad_norm": 0.5039455127265763,
|
|
"learning_rate": 3.4035036663360975e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106272041797638,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3417.8,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 2.2995461422087744,
|
|
"grad_norm": 0.49568446274979533,
|
|
"learning_rate": 3.398118721469255e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12370513379573822,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3370.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.307110438729198,
|
|
"grad_norm": 0.5881667474943352,
|
|
"learning_rate": 3.392713880782168e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382002979516983,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2965.1,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.3146747352496218,
|
|
"grad_norm": 0.47756381395119724,
|
|
"learning_rate": 3.3872892211880024e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10721984505653381,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3466.4,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 2.3222390317700454,
|
|
"grad_norm": 0.5079830235091746,
|
|
"learning_rate": 3.381844819881956e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14640769362449646,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3911.9,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.329803328290469,
|
|
"grad_norm": 0.5615265662965732,
|
|
"learning_rate": 3.376380754340161e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12771853804588318,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3164.5,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.3373676248108928,
|
|
"grad_norm": 0.4714448929627175,
|
|
"learning_rate": 3.370897102318579e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13411769270896912,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3468.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 2.344931921331316,
|
|
"grad_norm": 0.5745602347844158,
|
|
"learning_rate": 3.365393941851895e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16261687874794006,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3926.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.3524962178517397,
|
|
"grad_norm": 0.5173578647519521,
|
|
"learning_rate": 3.3598713512524095e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15512710809707642,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4075.1,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 2.3600605143721634,
|
|
"grad_norm": 0.537820074019125,
|
|
"learning_rate": 3.3543294091089196e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15758030116558075,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3582.1,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 2.367624810892587,
|
|
"grad_norm": 0.5152514599601117,
|
|
"learning_rate": 3.348768194285604e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13840244710445404,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3635.8,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 2.3751891074130107,
|
|
"grad_norm": 0.4861164375622333,
|
|
"learning_rate": 3.343187785920899e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12222899496555328,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4287.0,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 2.3827534039334344,
|
|
"grad_norm": 0.48426822931213487,
|
|
"learning_rate": 3.337588263426376e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09429280459880829,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2887.0,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.3903177004538576,
|
|
"grad_norm": 0.49375117140226976,
|
|
"learning_rate": 3.331969706485604e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10512296855449677,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3390.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 2.3978819969742813,
|
|
"grad_norm": 0.4377827832097994,
|
|
"learning_rate": 3.3263321950530244e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14309759438037872,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4955.0,
|
|
"valid_targets_min": 3315
|
|
},
|
|
{
|
|
"epoch": 2.405446293494705,
|
|
"grad_norm": 0.521135780683842,
|
|
"learning_rate": 3.320675809352807e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505127251148224,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3981.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 2.4130105900151286,
|
|
"grad_norm": 0.5175008346797431,
|
|
"learning_rate": 3.31500062987771e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12987926602363586,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3507.4,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 2.4205748865355523,
|
|
"grad_norm": 0.5076922123764839,
|
|
"learning_rate": 3.309306737387936e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10437558591365814,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 2.428139183055976,
|
|
"grad_norm": 0.5429041830472571,
|
|
"learning_rate": 3.303594212909981e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13545098900794983,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3221.5,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 2.435703479576399,
|
|
"grad_norm": 0.5596324649626183,
|
|
"learning_rate": 3.297863137735483e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14394566416740417,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3169.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.443267776096823,
|
|
"grad_norm": 0.5080679340717145,
|
|
"learning_rate": 3.292113593420064e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12522576749324799,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4034.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 2.4508320726172466,
|
|
"grad_norm": 0.5531901883038207,
|
|
"learning_rate": 3.2863456617821686e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1826557070016861,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4005.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 2.4583963691376702,
|
|
"grad_norm": 0.5813531251413108,
|
|
"learning_rate": 3.280559424901902e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13644327223300934,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3055.5,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 2.465960665658094,
|
|
"grad_norm": 0.552066336105151,
|
|
"learning_rate": 3.274754965119859e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122626394033432,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2586.5,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 2.4735249621785176,
|
|
"grad_norm": 0.5005589984669362,
|
|
"learning_rate": 3.268932365035957e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11573204398155212,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4762.0,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 2.481089258698941,
|
|
"grad_norm": 0.47261568823910527,
|
|
"learning_rate": 3.2630917075082545e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13608703017234802,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4445.5,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 2.4886535552193645,
|
|
"grad_norm": 0.47843080967525875,
|
|
"learning_rate": 3.257233075651776e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12716636061668396,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3717.1,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 2.496217851739788,
|
|
"grad_norm": 0.5659770382615649,
|
|
"learning_rate": 3.251356552837331e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12794221937656403,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2677.2,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 2.503782148260212,
|
|
"grad_norm": 0.49513447933854043,
|
|
"learning_rate": 3.24546222269032e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11842894554138184,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3260.0,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.5113464447806355,
|
|
"grad_norm": 0.4897282296391686,
|
|
"learning_rate": 3.239550169089554e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13932721316814423,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4350.4,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.5189107413010587,
|
|
"grad_norm": 0.5307253743771023,
|
|
"learning_rate": 3.233620476166052e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655274033546448,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2783.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.5264750378214824,
|
|
"grad_norm": 0.5696876859241821,
|
|
"learning_rate": 3.227673228301852e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19462570548057556,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3269.1,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 2.534039334341906,
|
|
"grad_norm": 0.5442561050566181,
|
|
"learning_rate": 3.221708510128803e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16979891061782837,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4332.6,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 2.5416036308623298,
|
|
"grad_norm": 0.5297323984920541,
|
|
"learning_rate": 3.215726406527366e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14393988251686096,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3220.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 2.5491679273827534,
|
|
"grad_norm": 0.5426246345482605,
|
|
"learning_rate": 3.209727002625403e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16277386248111725,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3968.4,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.556732223903177,
|
|
"grad_norm": 0.5121626016725528,
|
|
"learning_rate": 3.203710383796968e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13004092872142792,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3567.8,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 2.564296520423601,
|
|
"grad_norm": 0.6488519576514528,
|
|
"learning_rate": 3.197676635661088e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13749025762081146,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2703.1,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 2.5718608169440245,
|
|
"grad_norm": 0.568849338274085,
|
|
"learning_rate": 3.191625844080549e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13152454793453217,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3893.8,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 2.5794251134644477,
|
|
"grad_norm": 0.4984627035218723,
|
|
"learning_rate": 3.185558095160673e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16171471774578094,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3907.0,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 2.5869894099848714,
|
|
"grad_norm": 0.47458147771064174,
|
|
"learning_rate": 3.1794734752480904e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12639868259429932,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4130.6,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 2.594553706505295,
|
|
"grad_norm": 0.5272675964949979,
|
|
"learning_rate": 3.173372070929516e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12124425172805786,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3109.8,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 2.6021180030257187,
|
|
"grad_norm": 0.5012959372525905,
|
|
"learning_rate": 3.1672539690305085e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18400080502033234,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4023.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.609682299546142,
|
|
"grad_norm": 0.4692850775597721,
|
|
"learning_rate": 3.161119256614245e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348072737455368,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3856.1,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 2.6172465960665656,
|
|
"grad_norm": 0.4726794328654622,
|
|
"learning_rate": 3.1549680209802755e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12720650434494019,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4185.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 2.6248108925869893,
|
|
"grad_norm": 0.535586788887435,
|
|
"learning_rate": 3.148800349663284e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14029201865196228,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3449.4,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 2.632375189107413,
|
|
"grad_norm": 0.5148466759618716,
|
|
"learning_rate": 3.142616330431838e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593298614025116,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5175.8,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 2.6399394856278366,
|
|
"grad_norm": 0.6015834349845364,
|
|
"learning_rate": 3.136416051287145e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1478438377380371,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2627.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.6475037821482603,
|
|
"grad_norm": 0.4868929363200845,
|
|
"learning_rate": 3.130199600461797e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12947577238082886,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3736.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 2.655068078668684,
|
|
"grad_norm": 0.4355971800297212,
|
|
"learning_rate": 3.1239670664185175e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12511947751045227,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 2.6626323751891077,
|
|
"grad_norm": 0.4405197509810812,
|
|
"learning_rate": 3.1177185378488984e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358240991830826,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4174.2,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.670196671709531,
|
|
"grad_norm": 0.4609216288427696,
|
|
"learning_rate": 3.111454103672143e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226121112704277,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4159.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.6777609682299546,
|
|
"grad_norm": 0.4509147972190912,
|
|
"learning_rate": 3.105173853033796e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13133513927459717,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4088.0,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 2.6853252647503782,
|
|
"grad_norm": 0.5911949962276682,
|
|
"learning_rate": 3.098877875304478e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16372349858283997,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4029.0,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 2.692889561270802,
|
|
"grad_norm": 0.4704203929814393,
|
|
"learning_rate": 3.092566260078614e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1610604226589203,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4660.0,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 2.700453857791225,
|
|
"grad_norm": 0.5700793507251756,
|
|
"learning_rate": 3.086239097173155e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13377222418785095,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2980.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.708018154311649,
|
|
"grad_norm": 0.5566105593815984,
|
|
"learning_rate": 3.079896476626303e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11272990703582764,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2938.4,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 2.7155824508320725,
|
|
"grad_norm": 0.575579220068145,
|
|
"learning_rate": 3.073538488696229e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14602291584014893,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3657.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 2.723146747352496,
|
|
"grad_norm": 0.540569051415145,
|
|
"learning_rate": 3.0671652238597873e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12785914540290833,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3835.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 2.73071104387292,
|
|
"grad_norm": 0.5195021271074958,
|
|
"learning_rate": 3.060776772811231e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207313984632492,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3901.0,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 2.7382753403933435,
|
|
"grad_norm": 0.476228990813682,
|
|
"learning_rate": 3.0543732264609174e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13186213374137878,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3811.6,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 2.745839636913767,
|
|
"grad_norm": 0.4683092440623606,
|
|
"learning_rate": 3.0479546759340176e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12406468391418457,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3885.4,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 2.753403933434191,
|
|
"grad_norm": 0.49203197979510255,
|
|
"learning_rate": 3.0415212125692184e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12820132076740265,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3990.8,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 2.760968229954614,
|
|
"grad_norm": 0.5062448689750326,
|
|
"learning_rate": 3.0350729279174212e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11107508838176727,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3620.0,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 2.768532526475038,
|
|
"grad_norm": 0.473937537004572,
|
|
"learning_rate": 3.0286099137404426e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11070632934570312,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3283.4,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 2.7760968229954615,
|
|
"grad_norm": 0.5134361753669978,
|
|
"learning_rate": 3.0221322620097047e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956329643726349,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3491.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 2.783661119515885,
|
|
"grad_norm": 0.45352107903615635,
|
|
"learning_rate": 3.01564006490493e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1235525906085968,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3664.5,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.7912254160363084,
|
|
"grad_norm": 0.4725902589479603,
|
|
"learning_rate": 3.0091334148128265e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13518311083316803,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3999.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 2.798789712556732,
|
|
"grad_norm": 0.49522270427714443,
|
|
"learning_rate": 3.002612404325774e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1137615367770195,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3350.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.8063540090771557,
|
|
"grad_norm": 0.45386439108129767,
|
|
"learning_rate": 2.9960771262405085e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341477632522583,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4820.8,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 2.8139183055975794,
|
|
"grad_norm": 0.4572050158973545,
|
|
"learning_rate": 2.9895276735567988e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13375908136367798,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4445.8,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 2.821482602118003,
|
|
"grad_norm": 0.5052250631552716,
|
|
"learning_rate": 2.982964139476124e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13409888744354248,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3737.0,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 2.8290468986384267,
|
|
"grad_norm": 0.5513394696651329,
|
|
"learning_rate": 2.9763866174003473e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14021827280521393,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3307.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.8366111951588504,
|
|
"grad_norm": 0.5225961711187693,
|
|
"learning_rate": 2.9697952009303886e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11121019721031189,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3216.5,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 2.844175491679274,
|
|
"grad_norm": 0.48376050159028405,
|
|
"learning_rate": 2.9631899838648887e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1608983874320984,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4939.2,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 2.8517397881996973,
|
|
"grad_norm": 0.6057583069783826,
|
|
"learning_rate": 2.9565710601988783e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14926423132419586,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2823.4,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.859304084720121,
|
|
"grad_norm": 0.4904765919385824,
|
|
"learning_rate": 2.9499385241224395e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1171988919377327,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3579.4,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.8668683812405447,
|
|
"grad_norm": 0.6012627760343513,
|
|
"learning_rate": 2.943292470019361e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1141204908490181,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2247.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 2.8744326777609683,
|
|
"grad_norm": 0.5286353983864431,
|
|
"learning_rate": 2.936632992465803e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13957899808883667,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3220.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.8819969742813916,
|
|
"grad_norm": 0.42792740161634213,
|
|
"learning_rate": 2.9299601862289453e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294910848140717,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4641.6,
|
|
"valid_targets_min": 4088
|
|
},
|
|
{
|
|
"epoch": 2.8895612708018152,
|
|
"grad_norm": 0.4480941993571384,
|
|
"learning_rate": 2.92327414626564e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10716640949249268,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4133.6,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 2.897125567322239,
|
|
"grad_norm": 0.49133594202720215,
|
|
"learning_rate": 2.9165749677210615e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12948596477508545,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3928.4,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.9046898638426626,
|
|
"grad_norm": 0.4804249007742952,
|
|
"learning_rate": 2.9098627459273516e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153080642223358,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3909.8,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 2.9122541603630863,
|
|
"grad_norm": 0.5622433506299214,
|
|
"learning_rate": 2.9031375764022627e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14207279682159424,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3082.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 2.91981845688351,
|
|
"grad_norm": 0.48450847562909716,
|
|
"learning_rate": 2.8963995548477996e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256946325302124,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4064.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.9273827534039336,
|
|
"grad_norm": 0.5524818591264871,
|
|
"learning_rate": 2.8896487771488564e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09726505726575851,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2020.5,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 2.9349470499243573,
|
|
"grad_norm": 0.617037678339309,
|
|
"learning_rate": 2.882885339371852e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348113715648651,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2651.1,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 2.9425113464447805,
|
|
"grad_norm": 0.5886104898054546,
|
|
"learning_rate": 2.8761093377633657e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703062653541565,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3995.6,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 2.950075642965204,
|
|
"grad_norm": 0.5141280297395648,
|
|
"learning_rate": 2.8693208687487617e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14697790145874023,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4054.1,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.957639939485628,
|
|
"grad_norm": 0.5850769832690217,
|
|
"learning_rate": 2.8625200289308242e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13979512453079224,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 2.9652042360060515,
|
|
"grad_norm": 0.46130018662466954,
|
|
"learning_rate": 2.855706915088378e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16986322402954102,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5408.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.9727685325264748,
|
|
"grad_norm": 0.4386948186968485,
|
|
"learning_rate": 2.8488816241749123e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11828409135341644,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4271.6,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 2.9803328290468984,
|
|
"grad_norm": 0.4882393355671485,
|
|
"learning_rate": 2.8420442533171995e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13114064931869507,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3872.8,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 2.987897125567322,
|
|
"grad_norm": 0.4656726063630794,
|
|
"learning_rate": 2.8351948998139187e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16437414288520813,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5163.4,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 2.995461422087746,
|
|
"grad_norm": 0.5327526027874477,
|
|
"learning_rate": 2.8283336611342634e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14998671412467957,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3523.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 3.0030257186081695,
|
|
"grad_norm": 0.47719803477843653,
|
|
"learning_rate": 2.8214606349165587e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14007967710494995,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4427.5,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 3.010590015128593,
|
|
"grad_norm": 0.5668245723025805,
|
|
"learning_rate": 2.8145759189668748e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14221113920211792,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.018154311649017,
|
|
"grad_norm": 0.5227614263797575,
|
|
"learning_rate": 2.8076796112576273e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09853169322013855,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2828.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.02571860816944,
|
|
"grad_norm": 0.5397372187282524,
|
|
"learning_rate": 2.8007718099261886e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14145581424236298,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4510.9,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 3.0332829046898637,
|
|
"grad_norm": 0.48303159640549215,
|
|
"learning_rate": 2.7938526132734923e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11449413001537323,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3825.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 3.0408472012102874,
|
|
"grad_norm": 0.4802059679450191,
|
|
"learning_rate": 2.7869221197626307e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287687122821808,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4303.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 3.048411497730711,
|
|
"grad_norm": 0.4747515928347596,
|
|
"learning_rate": 2.7799804280174547e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12060800194740295,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4213.0,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.0559757942511347,
|
|
"grad_norm": 0.5113089610835009,
|
|
"learning_rate": 2.773027636821171e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528258979320526,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2739.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.0635400907715584,
|
|
"grad_norm": 0.4829659984993638,
|
|
"learning_rate": 2.7660638451149377e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10940650850534439,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3977.5,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 3.0711043872919817,
|
|
"grad_norm": 0.5154536930607023,
|
|
"learning_rate": 2.7590891519964523e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14696672558784485,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4821.2,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 3.0786686838124053,
|
|
"grad_norm": 0.4863514666766033,
|
|
"learning_rate": 2.7521036567185467e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1139116883277893,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3533.4,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 3.086232980332829,
|
|
"grad_norm": 0.540233456279864,
|
|
"learning_rate": 2.74510745868777e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11636720597743988,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3772.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 3.0937972768532527,
|
|
"grad_norm": 0.5597537191101317,
|
|
"learning_rate": 2.7381006574629764e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14788201451301575,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3691.8,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 3.1013615733736764,
|
|
"grad_norm": 0.5591881976973488,
|
|
"learning_rate": 2.7310833527539092e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11613605916500092,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3705.0,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.1089258698941,
|
|
"grad_norm": 0.5710655327209988,
|
|
"learning_rate": 2.7240556444197794e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12536919116973877,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3375.0,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.1164901664145233,
|
|
"grad_norm": 0.5295428023288915,
|
|
"learning_rate": 2.7170176324678466e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373474895954132,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4409.4,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 3.124054462934947,
|
|
"grad_norm": 2.3271519581915427,
|
|
"learning_rate": 2.7099694170519954e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14860421419143677,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4052.5,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 3.1316187594553706,
|
|
"grad_norm": 0.56548410146995,
|
|
"learning_rate": 2.702911098471309e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1227634996175766,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2917.5,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.1391830559757943,
|
|
"grad_norm": 0.4912751758413967,
|
|
"learning_rate": 2.6958427771686442e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11174537241458893,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 3.146747352496218,
|
|
"grad_norm": 0.6887482914538502,
|
|
"learning_rate": 2.6887645537292e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627592146396637,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2482.6,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 3.1543116490166416,
|
|
"grad_norm": 0.5135519608587061,
|
|
"learning_rate": 2.681676528879087e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15344315767288208,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4961.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.161875945537065,
|
|
"grad_norm": 0.4864094582180224,
|
|
"learning_rate": 2.674578803483894e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12170401215553284,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4345.6,
|
|
"valid_targets_min": 3665
|
|
},
|
|
{
|
|
"epoch": 3.1694402420574885,
|
|
"grad_norm": 0.4478406541884178,
|
|
"learning_rate": 2.6674714785472543e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14467225968837738,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5330.2,
|
|
"valid_targets_min": 3809
|
|
},
|
|
{
|
|
"epoch": 3.177004538577912,
|
|
"grad_norm": 0.5047559564389941,
|
|
"learning_rate": 2.660354655209403e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13611987233161926,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3544.9,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.184568835098336,
|
|
"grad_norm": 0.4832387362556074,
|
|
"learning_rate": 2.653228434745746e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11477263271808624,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4167.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 3.1921331316187596,
|
|
"grad_norm": 0.533021225777816,
|
|
"learning_rate": 2.6460929185654106e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1181040108203888,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3284.0,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 3.1996974281391832,
|
|
"grad_norm": 0.5044345819400912,
|
|
"learning_rate": 2.6389482082098078e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12338447570800781,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3515.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 3.2072617246596065,
|
|
"grad_norm": 0.4438322968319258,
|
|
"learning_rate": 2.6317944053511853e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11496617645025253,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4461.4,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 3.21482602118003,
|
|
"grad_norm": 0.527185496638436,
|
|
"learning_rate": 2.6246316117911804e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10938948392868042,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3821.0,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.222390317700454,
|
|
"grad_norm": 0.5499443037935335,
|
|
"learning_rate": 2.6174599294593738e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14151224493980408,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3408.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 3.2299546142208775,
|
|
"grad_norm": 0.47068011218936306,
|
|
"learning_rate": 2.6102794604118345e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11628115922212601,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4157.6,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.237518910741301,
|
|
"grad_norm": 0.47521240425192146,
|
|
"learning_rate": 2.6030903068296724e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13626612722873688,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4818.2,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 3.245083207261725,
|
|
"grad_norm": 0.5236302459959173,
|
|
"learning_rate": 2.5958925710175803e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13213109970092773,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3473.0,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 3.252647503782148,
|
|
"grad_norm": 0.5536536792303179,
|
|
"learning_rate": 2.5886863554023807e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08660998195409775,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2000.5,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.2602118003025717,
|
|
"grad_norm": 0.4693609569552803,
|
|
"learning_rate": 2.581471762531568e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1053948700428009,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4247.2,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 3.2677760968229954,
|
|
"grad_norm": 0.5154191937043178,
|
|
"learning_rate": 2.574248895071846e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13059362769126892,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3573.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.275340393343419,
|
|
"grad_norm": 0.5038916914342926,
|
|
"learning_rate": 2.5670178558076724e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11359833180904388,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3369.5,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 3.2829046898638428,
|
|
"grad_norm": 0.5279425968317832,
|
|
"learning_rate": 2.5597787476397918e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14045211672782898,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3427.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 3.2904689863842664,
|
|
"grad_norm": 0.5119887782735726,
|
|
"learning_rate": 2.5525316735837713e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13699717819690704,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3893.2,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 3.29803328290469,
|
|
"grad_norm": 0.4700334205991205,
|
|
"learning_rate": 2.545276736768538e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697575658559799,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3793.0,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 3.3055975794251133,
|
|
"grad_norm": 0.546895222879148,
|
|
"learning_rate": 2.5380140404349094e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11447377502918243,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3303.4,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.313161875945537,
|
|
"grad_norm": 0.45804083014518815,
|
|
"learning_rate": 2.5307436879341226e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12095325440168381,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3721.0,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.3207261724659607,
|
|
"grad_norm": 0.6072716800678424,
|
|
"learning_rate": 2.523465782726366e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14582321047782898,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3141.1,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 3.3282904689863844,
|
|
"grad_norm": 0.482635123431749,
|
|
"learning_rate": 2.5161804283793078e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13577789068222046,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4085.6,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.335854765506808,
|
|
"grad_norm": 0.514559829051362,
|
|
"learning_rate": 2.508887728566617e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16706421971321106,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4777.8,
|
|
"valid_targets_min": 3492
|
|
},
|
|
{
|
|
"epoch": 3.3434190620272313,
|
|
"grad_norm": 0.46871756096200656,
|
|
"learning_rate": 2.5015877870664956e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13309545814990997,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4447.0,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 3.350983358547655,
|
|
"grad_norm": 0.5436111365306565,
|
|
"learning_rate": 2.494280707760195e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17185726761817932,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4317.2,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 3.3585476550680786,
|
|
"grad_norm": 0.5715954607216548,
|
|
"learning_rate": 2.4869665946305416e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529509723186493,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2968.6,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 3.3661119515885023,
|
|
"grad_norm": 0.5128952622624829,
|
|
"learning_rate": 2.479645551760457e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1182350367307663,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4542.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.373676248108926,
|
|
"grad_norm": 0.4612928822373755,
|
|
"learning_rate": 2.4723176833314746e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710745632648468,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4132.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 3.3812405446293496,
|
|
"grad_norm": 0.46838485398482105,
|
|
"learning_rate": 2.4649830936222587e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12077198177576065,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4076.4,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.3888048411497733,
|
|
"grad_norm": 0.5152819133277382,
|
|
"learning_rate": 2.457641887007121e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14298635721206665,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5089.6,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.3963691376701965,
|
|
"grad_norm": 0.5163209541885163,
|
|
"learning_rate": 2.4502941679545332e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14179593324661255,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3700.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 3.40393343419062,
|
|
"grad_norm": 0.6653492194722043,
|
|
"learning_rate": 2.442940041025643e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14573529362678528,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2602.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 3.411497730711044,
|
|
"grad_norm": 0.5326352817610934,
|
|
"learning_rate": 2.4355796108727847e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12746964395046234,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3751.5,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 3.4190620272314676,
|
|
"grad_norm": 0.4778410203141501,
|
|
"learning_rate": 2.4282129822379896e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14547117054462433,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4655.2,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 3.4266263237518912,
|
|
"grad_norm": 0.49274461881682724,
|
|
"learning_rate": 2.4208402599514957e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519836038351059,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4866.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 3.4341906202723145,
|
|
"grad_norm": 0.5525223687868025,
|
|
"learning_rate": 2.4134615489302577e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13417750597000122,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3703.5,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 3.441754916792738,
|
|
"grad_norm": 0.5067333498498494,
|
|
"learning_rate": 2.4060769541764516e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438660323619843,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3626.8,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 3.449319213313162,
|
|
"grad_norm": 0.5234562605875683,
|
|
"learning_rate": 2.39868658077598e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16396035254001617,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4821.6,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 3.4568835098335855,
|
|
"grad_norm": 0.48085626014244787,
|
|
"learning_rate": 2.3912905338969815e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12416675686836243,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4160.9,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.464447806354009,
|
|
"grad_norm": 0.44544759619030705,
|
|
"learning_rate": 2.383888918788328e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578204691410065,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4187.6,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.472012102874433,
|
|
"grad_norm": 0.565881276305256,
|
|
"learning_rate": 2.37648184077813e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13886961340904236,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3552.5,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 3.4795763993948565,
|
|
"grad_norm": 0.5131232624017489,
|
|
"learning_rate": 2.3690694052722384e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15908683836460114,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.4871406959152798,
|
|
"grad_norm": 0.5268960937998457,
|
|
"learning_rate": 2.361651717752742e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11232242733240128,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3074.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 3.4947049924357034,
|
|
"grad_norm": 0.5309270452819024,
|
|
"learning_rate": 2.35422888377647e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16531693935394287,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4520.0,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 3.502269288956127,
|
|
"grad_norm": 0.45135038292018936,
|
|
"learning_rate": 2.3468010089734854e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12506237626075745,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4716.4,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 3.5098335854765508,
|
|
"grad_norm": 0.5132183460598854,
|
|
"learning_rate": 2.3393681990455877e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425256311893463,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3711.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 3.517397881996974,
|
|
"grad_norm": 0.5088359392214638,
|
|
"learning_rate": 2.331930559764801e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294846028089523,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4017.6,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 3.5249621785173977,
|
|
"grad_norm": 0.512244589129325,
|
|
"learning_rate": 2.3244881969718768e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129659041762352,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3530.1,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 3.5325264750378214,
|
|
"grad_norm": 0.6985234964349193,
|
|
"learning_rate": 2.317041216574782e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12587997317314148,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2764.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 3.540090771558245,
|
|
"grad_norm": 0.5298431562669271,
|
|
"learning_rate": 2.309589724547195e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11732418835163116,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3504.9,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.5476550680786687,
|
|
"grad_norm": 0.5610753242194402,
|
|
"learning_rate": 2.3021338269269968e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14564543962478638,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3996.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.5552193645990924,
|
|
"grad_norm": 0.48487280761981916,
|
|
"learning_rate": 2.2946736298147605e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08526395261287689,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2877.0,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.562783661119516,
|
|
"grad_norm": 0.501442919960662,
|
|
"learning_rate": 2.287209239372244e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1054798886179924,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3998.0,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 3.5703479576399397,
|
|
"grad_norm": 0.5543475393939046,
|
|
"learning_rate": 2.2797407618208784e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14811848104000092,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3531.6,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 3.577912254160363,
|
|
"grad_norm": 0.5260974118615571,
|
|
"learning_rate": 2.2722683034402543e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10030622035264969,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3087.2,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 3.5854765506807866,
|
|
"grad_norm": 0.48358507368231846,
|
|
"learning_rate": 2.264791970566613e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13227717578411102,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4272.5,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 3.5930408472012103,
|
|
"grad_norm": 0.5343134413822198,
|
|
"learning_rate": 2.2573118695913303e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320905089378357,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3891.0,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 3.600605143721634,
|
|
"grad_norm": 0.43079384384638436,
|
|
"learning_rate": 2.2498281069594045e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10163118690252304,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3972.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 3.608169440242057,
|
|
"grad_norm": 0.4719052198927715,
|
|
"learning_rate": 2.2423407891679405e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10454003512859344,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3748.9,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 3.615733736762481,
|
|
"grad_norm": 0.4920034458773264,
|
|
"learning_rate": 2.2348500227646347e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11999665200710297,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3922.1,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.6232980332829046,
|
|
"grad_norm": 0.4961141436713153,
|
|
"learning_rate": 2.2273559143462574e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15102550387382507,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5160.8,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 3.6308623298033282,
|
|
"grad_norm": 0.5631392470789066,
|
|
"learning_rate": 2.21985857055714e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15359273552894592,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3642.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.638426626323752,
|
|
"grad_norm": 0.6035914988641711,
|
|
"learning_rate": 2.212358098087652e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12285001575946808,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4163.9,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.6459909228441756,
|
|
"grad_norm": 0.4911820958460871,
|
|
"learning_rate": 2.2048546036726867e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1128137931227684,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2851.8,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.6535552193645993,
|
|
"grad_norm": 0.5278642633803777,
|
|
"learning_rate": 2.1973481940901403e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12860259413719177,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3918.9,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 3.661119515885023,
|
|
"grad_norm": 0.43362808963422617,
|
|
"learning_rate": 2.1898389761593933e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10812032222747803,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3877.5,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 3.668683812405446,
|
|
"grad_norm": 0.4516772987652707,
|
|
"learning_rate": 2.1823270567397908e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11308574676513672,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4384.9,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 3.67624810892587,
|
|
"grad_norm": 0.5201531245633447,
|
|
"learning_rate": 2.1748125427291203e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14627112448215485,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3879.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.6838124054462935,
|
|
"grad_norm": 0.5172413396500244,
|
|
"learning_rate": 2.1672955410620916e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14754822850227356,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3614.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 3.691376701966717,
|
|
"grad_norm": 0.5539478703498129,
|
|
"learning_rate": 2.1597761587088146e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793599605560303,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4337.0,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 3.6989409984871404,
|
|
"grad_norm": 0.500844305066668,
|
|
"learning_rate": 2.1522545026732793e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0867573693394661,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2984.5,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.706505295007564,
|
|
"grad_norm": 0.648138883937255,
|
|
"learning_rate": 2.1447306799918285e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09259147942066193,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3062.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 3.7140695915279878,
|
|
"grad_norm": 0.5196424928723336,
|
|
"learning_rate": 2.137204797731638e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09301532804965973,
|
|
"step": 2455,
|
|
"valid_targets_mean": 2877.1,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 3.7216338880484114,
|
|
"grad_norm": 0.5568691176313814,
|
|
"learning_rate": 2.1296769629891946e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11971484124660492,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3172.6,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 3.729198184568835,
|
|
"grad_norm": 0.5631009425816454,
|
|
"learning_rate": 2.1221472828887672e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12958365678787231,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3187.5,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 3.736762481089259,
|
|
"grad_norm": 0.5351903808624408,
|
|
"learning_rate": 2.1146158645808845e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1400567889213562,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3726.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.7443267776096825,
|
|
"grad_norm": 0.460232015564632,
|
|
"learning_rate": 2.107082815240813e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11878558993339539,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4080.5,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 3.751891074130106,
|
|
"grad_norm": 0.46577746069466563,
|
|
"learning_rate": 2.099548242067028e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11896295845508575,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4347.0,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.7594553706505294,
|
|
"grad_norm": 0.5303002437257797,
|
|
"learning_rate": 2.0920122522796894e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12855184078216553,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3049.9,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 3.767019667170953,
|
|
"grad_norm": 0.459032813335059,
|
|
"learning_rate": 2.0844749531191164e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10262055695056915,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3960.6,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 3.7745839636913767,
|
|
"grad_norm": 0.503504453198567,
|
|
"learning_rate": 2.076936451844263e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520178079605103,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2910.0,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.7821482602118004,
|
|
"grad_norm": 0.5349084536126504,
|
|
"learning_rate": 2.0693968557311858e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392827332019806,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3579.8,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.789712556732224,
|
|
"grad_norm": 0.49610171977122425,
|
|
"learning_rate": 2.061856272071525e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09950701892375946,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2679.1,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.7972768532526473,
|
|
"grad_norm": 0.4738426379116685,
|
|
"learning_rate": 2.0543148081709726e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10191960632801056,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4002.4,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.804841149773071,
|
|
"grad_norm": 0.4229934959977213,
|
|
"learning_rate": 2.0467725713477463e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12579496204853058,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4794.8,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 3.8124054462934946,
|
|
"grad_norm": 0.47800748692505146,
|
|
"learning_rate": 2.0392296689310646e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12907111644744873,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3368.2,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 3.8199697428139183,
|
|
"grad_norm": 0.48145940348903965,
|
|
"learning_rate": 2.0316862082596153e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13347336649894714,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4663.1,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 3.827534039334342,
|
|
"grad_norm": 0.5128958527262729,
|
|
"learning_rate": 2.024142296680032e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13235211372375488,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3623.6,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.8350983358547657,
|
|
"grad_norm": 0.5064501629942121,
|
|
"learning_rate": 2.0165980415453643e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1219475194811821,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3311.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.8426626323751893,
|
|
"grad_norm": 0.4873149055132525,
|
|
"learning_rate": 2.0090535502135516e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12062723189592361,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3595.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.8502269288956126,
|
|
"grad_norm": 0.5089733907602141,
|
|
"learning_rate": 2.0015089300458928e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12300848960876465,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4261.9,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 3.8577912254160363,
|
|
"grad_norm": 0.536266967217227,
|
|
"learning_rate": 1.9939642884055215e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144562229514122,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3689.2,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 3.86535552193646,
|
|
"grad_norm": 0.5871680796020381,
|
|
"learning_rate": 1.9864197326558784e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15210211277008057,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3356.0,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 3.8729198184568836,
|
|
"grad_norm": 0.45937566062948876,
|
|
"learning_rate": 1.9788753701591767e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12153743207454681,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4055.0,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.8804841149773073,
|
|
"grad_norm": 0.5236430333821777,
|
|
"learning_rate": 1.9713313082748867e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393303751945496,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3939.6,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 3.8880484114977305,
|
|
"grad_norm": 0.5165552090772977,
|
|
"learning_rate": 1.963787654358194e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16077455878257751,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4948.2,
|
|
"valid_targets_min": 3818
|
|
},
|
|
{
|
|
"epoch": 3.895612708018154,
|
|
"grad_norm": 0.48529160118485104,
|
|
"learning_rate": 1.9562445157584826e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11260077357292175,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3703.1,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.903177004538578,
|
|
"grad_norm": 0.4759937305183704,
|
|
"learning_rate": 1.9487019998178042e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1151745393872261,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3716.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 3.9107413010590015,
|
|
"grad_norm": 0.45694570706491683,
|
|
"learning_rate": 1.9411602138693457e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15117573738098145,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4811.0,
|
|
"valid_targets_min": 4005
|
|
},
|
|
{
|
|
"epoch": 3.918305597579425,
|
|
"grad_norm": 0.5025150376358511,
|
|
"learning_rate": 1.9336192652359088e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284588724374771,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3758.4,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 3.925869894099849,
|
|
"grad_norm": 0.4851629875618734,
|
|
"learning_rate": 1.9260792612283816e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10034874826669693,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3109.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.9334341906202726,
|
|
"grad_norm": 0.5505050229435743,
|
|
"learning_rate": 1.9185403091442044e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495353877544403,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3745.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 3.940998487140696,
|
|
"grad_norm": 0.476897063050215,
|
|
"learning_rate": 1.9110025162658522e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11279018968343735,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3752.8,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.9485627836611195,
|
|
"grad_norm": 0.6484205124756889,
|
|
"learning_rate": 1.903465989859305e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17111729085445404,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2757.6,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 3.956127080181543,
|
|
"grad_norm": 0.4693919291300168,
|
|
"learning_rate": 1.8959308371725157e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229323148727417,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4494.5,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 3.963691376701967,
|
|
"grad_norm": 0.48463622113414595,
|
|
"learning_rate": 1.8883971654338927e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1053410992026329,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3319.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 3.9712556732223905,
|
|
"grad_norm": 0.5117746192688113,
|
|
"learning_rate": 1.8808650818507695e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12720650434494019,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3734.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 3.9788199697428137,
|
|
"grad_norm": 0.5572848123203943,
|
|
"learning_rate": 1.8733346936078768e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08401644229888916,
|
|
"step": 2630,
|
|
"valid_targets_mean": 1791.4,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.9863842662632374,
|
|
"grad_norm": 0.5193221428643123,
|
|
"learning_rate": 1.8658061078658224e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11938201636075974,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2957.5,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 3.993948562783661,
|
|
"grad_norm": 0.4790949003343508,
|
|
"learning_rate": 1.8582794317595628e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11943402141332626,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4121.6,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 4.001512859304085,
|
|
"grad_norm": 0.564881094196291,
|
|
"learning_rate": 1.8507547723968795e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348753571510315,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4248.2,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.009077155824508,
|
|
"grad_norm": 0.4787961401892637,
|
|
"learning_rate": 1.8432322368568562e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08088739216327667,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3175.5,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 4.016641452344932,
|
|
"grad_norm": 0.551599215957862,
|
|
"learning_rate": 1.835711932188351e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14693880081176758,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4340.2,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 4.024205748865356,
|
|
"grad_norm": 0.4961073191796699,
|
|
"learning_rate": 1.8281939654084783e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10348083078861237,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3659.9,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.031770045385779,
|
|
"grad_norm": 0.5048134163631552,
|
|
"learning_rate": 1.820678443501083e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261610478162766,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3776.1,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 4.039334341906203,
|
|
"grad_norm": 0.5600306152834418,
|
|
"learning_rate": 1.8131654734152165e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11598756909370422,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3201.4,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 4.046898638426626,
|
|
"grad_norm": 0.5368522971010851,
|
|
"learning_rate": 1.805655162063619e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09532465785741806,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2598.4,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 4.05446293494705,
|
|
"grad_norm": 0.553872627853563,
|
|
"learning_rate": 1.798147616321195e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10917577892541885,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2667.8,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 4.062027231467473,
|
|
"grad_norm": 0.5145765283700123,
|
|
"learning_rate": 1.7906429430234927e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08023340255022049,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2110.4,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 4.069591527987897,
|
|
"grad_norm": 0.5315750105081682,
|
|
"learning_rate": 1.783141248965184e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12735947966575623,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3996.4,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.077155824508321,
|
|
"grad_norm": 0.5063288327026655,
|
|
"learning_rate": 1.775642640898547e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0774693638086319,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2375.4,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 4.084720121028744,
|
|
"grad_norm": 0.6412464341566743,
|
|
"learning_rate": 1.7681472255319417e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17522436380386353,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4171.5,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 4.092284417549168,
|
|
"grad_norm": 0.46115067108114244,
|
|
"learning_rate": 1.7606551095282978e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11323939263820648,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4863.0,
|
|
"valid_targets_min": 3647
|
|
},
|
|
{
|
|
"epoch": 4.099848714069592,
|
|
"grad_norm": 0.5295009629598595,
|
|
"learning_rate": 1.753166399503591e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0959424152970314,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2514.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 4.107413010590015,
|
|
"grad_norm": 0.5624548181468703,
|
|
"learning_rate": 1.74568120202533e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11405560374259949,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2655.9,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.114977307110439,
|
|
"grad_norm": 0.5477878228696262,
|
|
"learning_rate": 1.7381996236110386e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09478546679019928,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2572.5,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 4.122541603630863,
|
|
"grad_norm": 0.4883539600867654,
|
|
"learning_rate": 1.730721770726739e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11574354767799377,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 4.130105900151286,
|
|
"grad_norm": 0.4540797425559296,
|
|
"learning_rate": 1.7232477497854377e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252633035182953,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5551.4,
|
|
"valid_targets_min": 3559
|
|
},
|
|
{
|
|
"epoch": 4.13767019667171,
|
|
"grad_norm": 0.4952038923603066,
|
|
"learning_rate": 1.7157776671456114e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1223996952176094,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4354.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.145234493192133,
|
|
"grad_norm": 0.47604574812037037,
|
|
"learning_rate": 1.7083116291096926e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12114831805229187,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4293.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 4.1527987897125564,
|
|
"grad_norm": 0.5665976704260526,
|
|
"learning_rate": 1.7008497419225578e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10916566103696823,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2655.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 4.16036308623298,
|
|
"grad_norm": 0.5982895270633206,
|
|
"learning_rate": 1.6933921117700156e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0887761041522026,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2820.5,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.167927382753404,
|
|
"grad_norm": 0.5278836375010175,
|
|
"learning_rate": 1.6859388447772936e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11292625218629837,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4113.5,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 4.1754916792738275,
|
|
"grad_norm": 0.5475395037936346,
|
|
"learning_rate": 1.6784900470075312e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12099523842334747,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3736.5,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 4.183055975794251,
|
|
"grad_norm": 0.6246168445956253,
|
|
"learning_rate": 1.6710458244602695e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09174573421478271,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2047.4,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 4.190620272314675,
|
|
"grad_norm": 0.6861466643354196,
|
|
"learning_rate": 1.66360628306994e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14144690334796906,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2746.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 4.1981845688350985,
|
|
"grad_norm": 0.49555699838439193,
|
|
"learning_rate": 1.656171528704361e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137906014919281,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4988.1,
|
|
"valid_targets_min": 4220
|
|
},
|
|
{
|
|
"epoch": 4.205748865355522,
|
|
"grad_norm": 0.672156577108089,
|
|
"learning_rate": 1.648741667163229e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12912911176681519,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3163.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 4.213313161875946,
|
|
"grad_norm": 0.5345083683362609,
|
|
"learning_rate": 1.641316804176613e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398747563362122,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3701.5,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.2208774583963695,
|
|
"grad_norm": 0.6096553455522178,
|
|
"learning_rate": 1.6338970454034527e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07400164008140564,
|
|
"step": 2790,
|
|
"valid_targets_mean": 1625.5,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.228441754916792,
|
|
"grad_norm": 0.7767419562226817,
|
|
"learning_rate": 1.626482496430049e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13238486647605896,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3312.0,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.236006051437216,
|
|
"grad_norm": 0.5508310696946563,
|
|
"learning_rate": 1.6190732627685686e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12000587582588196,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3183.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 4.24357034795764,
|
|
"grad_norm": 0.5227023273936355,
|
|
"learning_rate": 1.611669449855537e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16310825943946838,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4988.4,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 4.251134644478063,
|
|
"grad_norm": 0.6016820525362891,
|
|
"learning_rate": 1.6042711630503406e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09568960219621658,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2572.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 4.258698940998487,
|
|
"grad_norm": 0.5089506622998009,
|
|
"learning_rate": 1.5968785076337273e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0989098846912384,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2790.4,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 4.266263237518911,
|
|
"grad_norm": 0.47327310785000054,
|
|
"learning_rate": 1.5894915888063085e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13154059648513794,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4909.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.273827534039334,
|
|
"grad_norm": 0.4815972649994574,
|
|
"learning_rate": 1.5821105116870594e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311277449131012,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5336.5,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 4.281391830559758,
|
|
"grad_norm": 0.6003291250392716,
|
|
"learning_rate": 1.5747353813118276e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14462804794311523,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.288956127080182,
|
|
"grad_norm": 0.5408962981681655,
|
|
"learning_rate": 1.567366302631835e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10000055283308029,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2628.5,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 4.296520423600605,
|
|
"grad_norm": 0.5343370409286777,
|
|
"learning_rate": 1.560003380512185e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10778696835041046,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 4.304084720121029,
|
|
"grad_norm": 0.5718344632454332,
|
|
"learning_rate": 1.5526467197303715e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107022687792778,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.311649016641453,
|
|
"grad_norm": 0.5186035649555514,
|
|
"learning_rate": 1.5452964249747848e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11472859978675842,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3448.1,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 4.319213313161876,
|
|
"grad_norm": 0.5064502344796026,
|
|
"learning_rate": 1.537952600843227e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09305392950773239,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3808.5,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 4.326777609682299,
|
|
"grad_norm": 0.4915325006815678,
|
|
"learning_rate": 1.5306153518414197e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13986262679100037,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4858.5,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 4.334341906202723,
|
|
"grad_norm": 0.5069626029643657,
|
|
"learning_rate": 1.523284782381514e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11732686311006546,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3943.0,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.3419062027231465,
|
|
"grad_norm": 0.45167948159534593,
|
|
"learning_rate": 1.5159609967806135e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12512706220149994,
|
|
"step": 2870,
|
|
"valid_targets_mean": 5958.8,
|
|
"valid_targets_min": 3769
|
|
},
|
|
{
|
|
"epoch": 4.34947049924357,
|
|
"grad_norm": 0.5597844962813994,
|
|
"learning_rate": 1.5086440992592826e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251029074192047,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3696.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.357034795763994,
|
|
"grad_norm": 0.49162513746301084,
|
|
"learning_rate": 1.5013341939400628e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12625879049301147,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4608.6,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 4.364599092284418,
|
|
"grad_norm": 0.5520015471974049,
|
|
"learning_rate": 1.4940313848459975e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385439783334732,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4160.1,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 4.372163388804841,
|
|
"grad_norm": 0.48528640883869695,
|
|
"learning_rate": 1.4867357758991474e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11266931891441345,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4380.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.379727685325265,
|
|
"grad_norm": 0.49239445423502226,
|
|
"learning_rate": 1.4794474709191082e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11890158802270889,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4355.5,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.387291981845689,
|
|
"grad_norm": 0.5497615386936762,
|
|
"learning_rate": 1.4721665736215416e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16045933961868286,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4424.8,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 4.394856278366112,
|
|
"grad_norm": 0.5034917489329059,
|
|
"learning_rate": 1.4648931876166931e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0954614132642746,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2726.4,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.402420574886536,
|
|
"grad_norm": 0.4690073480589113,
|
|
"learning_rate": 1.4576274164079183e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12390436232089996,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4430.0,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 4.409984871406959,
|
|
"grad_norm": 0.4998984716132741,
|
|
"learning_rate": 1.4503693633902128e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08893519639968872,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3323.4,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.417549167927382,
|
|
"grad_norm": 0.5120054971633876,
|
|
"learning_rate": 1.4431191318487372e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10906384885311127,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3355.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 4.425113464447806,
|
|
"grad_norm": 0.5254697466249953,
|
|
"learning_rate": 1.4358768249573514e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.094947949051857,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2790.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 4.43267776096823,
|
|
"grad_norm": 0.5744765614036916,
|
|
"learning_rate": 1.4286425457771427e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13167189061641693,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3681.9,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.440242057488653,
|
|
"grad_norm": 0.44663542819737223,
|
|
"learning_rate": 1.4214163972549604e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205366790294647,
|
|
"step": 2935,
|
|
"valid_targets_mean": 5724.9,
|
|
"valid_targets_min": 3689
|
|
},
|
|
{
|
|
"epoch": 4.447806354009077,
|
|
"grad_norm": 0.5266008057274614,
|
|
"learning_rate": 1.4141984822219521e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12702511250972748,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4134.8,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 4.455370650529501,
|
|
"grad_norm": 0.48887488381758676,
|
|
"learning_rate": 1.4069889033920998e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1139833927154541,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4432.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.462934947049924,
|
|
"grad_norm": 0.4725315122592429,
|
|
"learning_rate": 1.3997877633607557e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13320820033550262,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4789.6,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 4.470499243570348,
|
|
"grad_norm": 0.4813738399153288,
|
|
"learning_rate": 1.3925951646031864e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357446014881134,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5188.8,
|
|
"valid_targets_min": 3840
|
|
},
|
|
{
|
|
"epoch": 4.478063540090772,
|
|
"grad_norm": 0.4994147550953032,
|
|
"learning_rate": 1.3854112094731116e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.115604929625988,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4227.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.4856278366111955,
|
|
"grad_norm": 0.5155722357927405,
|
|
"learning_rate": 1.3782360002012485e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08848443627357483,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2711.6,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 4.493192133131619,
|
|
"grad_norm": 0.586136870936074,
|
|
"learning_rate": 1.3710696388938574e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09821444749832153,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2079.8,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 4.500756429652043,
|
|
"grad_norm": 0.4951908014021543,
|
|
"learning_rate": 1.3639122275312886e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11612828820943832,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4001.9,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 4.508320726172466,
|
|
"grad_norm": 0.590171805314269,
|
|
"learning_rate": 1.3567638679665296e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10107433050870895,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2116.1,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.515885022692889,
|
|
"grad_norm": 0.6213111334322668,
|
|
"learning_rate": 1.3496246619237585e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15072540938854218,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3562.1,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.523449319213313,
|
|
"grad_norm": 0.5257669447710505,
|
|
"learning_rate": 1.3424947109968944e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1143098771572113,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3905.6,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 4.531013615733737,
|
|
"grad_norm": 0.5151066823461002,
|
|
"learning_rate": 1.3353741166481515e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11991828680038452,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3083.4,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.53857791225416,
|
|
"grad_norm": 0.5226905915317058,
|
|
"learning_rate": 1.3282629802065974e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528303146362305,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3129.0,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 4.546142208774584,
|
|
"grad_norm": 0.45942149528895065,
|
|
"learning_rate": 1.3211614028667077e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12515589594841003,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5110.8,
|
|
"valid_targets_min": 3872
|
|
},
|
|
{
|
|
"epoch": 4.553706505295008,
|
|
"grad_norm": 0.5564368809791808,
|
|
"learning_rate": 1.3140694856869297e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13744589686393738,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3624.4,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.561270801815431,
|
|
"grad_norm": 0.5376519077358685,
|
|
"learning_rate": 1.306987329588242e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10422738641500473,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2724.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 4.568835098335855,
|
|
"grad_norm": 0.5632261886569494,
|
|
"learning_rate": 1.2999150353527182e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14442552626132965,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3796.8,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 4.576399394856279,
|
|
"grad_norm": 0.5232959397479129,
|
|
"learning_rate": 1.2928527036220944e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0743347704410553,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2484.0,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 4.583963691376702,
|
|
"grad_norm": 0.4881869941527992,
|
|
"learning_rate": 1.285800434896336e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10796622186899185,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3881.9,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 4.591527987897125,
|
|
"grad_norm": 0.46344711426350815,
|
|
"learning_rate": 1.2787583295322063e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10884660482406616,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.599092284417549,
|
|
"grad_norm": 0.5598408850635183,
|
|
"learning_rate": 1.2717264877418409e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10853119194507599,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2540.6,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.6066565809379725,
|
|
"grad_norm": 0.5186034491309895,
|
|
"learning_rate": 1.2647050095913211e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10633227229118347,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3287.1,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 4.614220877458396,
|
|
"grad_norm": 0.4788626649096419,
|
|
"learning_rate": 1.2576939949992468e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12777367234230042,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5100.0,
|
|
"valid_targets_min": 4040
|
|
},
|
|
{
|
|
"epoch": 4.62178517397882,
|
|
"grad_norm": 0.5490887970753122,
|
|
"learning_rate": 1.2506935437353192e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10429686307907104,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2967.2,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 4.6293494704992435,
|
|
"grad_norm": 0.5772188158460232,
|
|
"learning_rate": 1.2437037554189186e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09326672554016113,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2559.0,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 4.636913767019667,
|
|
"grad_norm": 0.5711217103398434,
|
|
"learning_rate": 1.2367247295176855e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10453672707080841,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3057.5,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.644478063540091,
|
|
"grad_norm": 0.5340529864098529,
|
|
"learning_rate": 1.2297565653461087e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255873143672943,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3894.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.6520423600605145,
|
|
"grad_norm": 0.5471514311172181,
|
|
"learning_rate": 1.2227993620641083e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10671280324459076,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3551.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 4.659606656580938,
|
|
"grad_norm": 0.5237314682634302,
|
|
"learning_rate": 1.2158532186756275e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11672964692115784,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3375.1,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 4.667170953101362,
|
|
"grad_norm": 0.6096326558897097,
|
|
"learning_rate": 1.2089182340272227e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352270245552063,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2849.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 4.6747352496217855,
|
|
"grad_norm": 0.475774294574106,
|
|
"learning_rate": 1.201994506806655e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08956238627433777,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3718.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 4.682299546142209,
|
|
"grad_norm": 0.53615549427534,
|
|
"learning_rate": 1.1950821355414894e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09473133832216263,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2883.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 4.689863842662632,
|
|
"grad_norm": 0.46913977468464885,
|
|
"learning_rate": 1.1881812185976902e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14270634949207306,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4890.9,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 4.697428139183056,
|
|
"grad_norm": 0.475634993726397,
|
|
"learning_rate": 1.1812918541782215e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11165912449359894,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4017.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.704992435703479,
|
|
"grad_norm": 0.5252187350801427,
|
|
"learning_rate": 1.1744141403216503e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12445740401744843,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3318.9,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 4.712556732223903,
|
|
"grad_norm": 0.4623120922019237,
|
|
"learning_rate": 1.1675481749007518e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08409413695335388,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3700.4,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.720121028744327,
|
|
"grad_norm": 0.561460796730536,
|
|
"learning_rate": 1.1606940556211147e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13790298998355865,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3262.4,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 4.72768532526475,
|
|
"grad_norm": 0.5948662786552812,
|
|
"learning_rate": 1.1538518800197538e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13142284750938416,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3600.8,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 4.735249621785174,
|
|
"grad_norm": 0.5008629012897646,
|
|
"learning_rate": 1.1470217454637193e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471904069185257,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4615.8,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.742813918305598,
|
|
"grad_norm": 0.49560596701998627,
|
|
"learning_rate": 1.1402037491487112e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09480459988117218,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2921.0,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.750378214826021,
|
|
"grad_norm": 0.6232412020114194,
|
|
"learning_rate": 1.1333979880976992e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11171066761016846,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2366.9,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 4.757942511346445,
|
|
"grad_norm": 0.49048495661789504,
|
|
"learning_rate": 1.1266045591595391e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12888431549072266,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4169.2,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 4.765506807866869,
|
|
"grad_norm": 0.5270231274240649,
|
|
"learning_rate": 1.1198235590075951e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09971421957015991,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2927.6,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 4.7730711043872915,
|
|
"grad_norm": 0.5088982595577873,
|
|
"learning_rate": 1.1130550841383662e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10408955812454224,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3089.0,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 4.780635400907715,
|
|
"grad_norm": 0.5768770499061177,
|
|
"learning_rate": 1.1062992308701089e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128196582198143,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3259.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 4.788199697428139,
|
|
"grad_norm": 0.5205258858296429,
|
|
"learning_rate": 1.0995560953414701e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1128782406449318,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3130.2,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.795763993948563,
|
|
"grad_norm": 0.5051436984646774,
|
|
"learning_rate": 1.0928257735101186e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534266322851181,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5558.1,
|
|
"valid_targets_min": 4568
|
|
},
|
|
{
|
|
"epoch": 4.803328290468986,
|
|
"grad_norm": 0.5789720350392524,
|
|
"learning_rate": 1.0861083611513781e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10815252363681793,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2892.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.81089258698941,
|
|
"grad_norm": 0.4626820661429203,
|
|
"learning_rate": 1.0794039538568653e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11426861584186554,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5182.4,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.818456883509834,
|
|
"grad_norm": 0.570883447624291,
|
|
"learning_rate": 1.0727126470331299e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12339181452989578,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3281.6,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.826021180030257,
|
|
"grad_norm": 0.609807164517744,
|
|
"learning_rate": 1.0660345359002941e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13398966193199158,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3367.2,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.833585476550681,
|
|
"grad_norm": 0.543932530452066,
|
|
"learning_rate": 1.0593697154907027e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10489015281200409,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3189.5,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 4.841149773071105,
|
|
"grad_norm": 0.4784527466730976,
|
|
"learning_rate": 1.0527182806475662e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10988245904445648,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3885.5,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.848714069591528,
|
|
"grad_norm": 0.5660591677232388,
|
|
"learning_rate": 1.0460803260236134e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11075103282928467,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2675.4,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 4.856278366111952,
|
|
"grad_norm": 0.450422063864158,
|
|
"learning_rate": 1.0394559460797446e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1185651570558548,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4336.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 4.863842662632376,
|
|
"grad_norm": 0.5056768067227796,
|
|
"learning_rate": 1.0328452350836842e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10798711329698563,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4007.1,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.871406959152798,
|
|
"grad_norm": 0.591595620007623,
|
|
"learning_rate": 1.0262482871086443e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0954662412405014,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2585.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 4.878971255673222,
|
|
"grad_norm": 0.5136722449419053,
|
|
"learning_rate": 1.019665196031982e-05,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13012893497943878,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4399.6,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.886535552193646,
|
|
"grad_norm": 0.7937605317515639,
|
|
"learning_rate": 1.013096055533866e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15884897112846375,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2668.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 4.8940998487140694,
|
|
"grad_norm": 0.46873586017252306,
|
|
"learning_rate": 1.006540959095941e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1168365329504013,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3744.6,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 4.901664145234493,
|
|
"grad_norm": 0.4834802584046994,
|
|
"learning_rate": 1.0000000000000006e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09709112346172333,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3545.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.909228441754917,
|
|
"grad_norm": 0.45850621263455865,
|
|
"learning_rate": 9.93473271326655e-06,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11098715662956238,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4049.8,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 4.9167927382753405,
|
|
"grad_norm": 0.693348557141324,
|
|
"learning_rate": 9.869608659540129e-06,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11698335409164429,
|
|
"step": 3250,
|
|
"valid_targets_mean": 1598.1,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 4.924357034795764,
|
|
"grad_norm": 0.5314962335237453,
|
|
"learning_rate": 9.804628765563542e-06,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15482136607170105,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4716.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.931921331316188,
|
|
"grad_norm": 0.5415373464766291,
|
|
"learning_rate": 9.739793956028143e-06,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225585788488388,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3426.0,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 4.9394856278366115,
|
|
"grad_norm": 0.4827840047034187,
|
|
"learning_rate": 9.675105153560668e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162397488951683,
|
|
"step": 3265,
|
|
"valid_targets_mean": 4160.6,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.947049924357035,
|
|
"grad_norm": 0.482795627078259,
|
|
"learning_rate": 9.610563278710128e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09905041754245758,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3923.9,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 4.954614220877458,
|
|
"grad_norm": 0.8052032817458329,
|
|
"learning_rate": 9.546169249934654e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12180210649967194,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2791.0,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 4.962178517397882,
|
|
"grad_norm": 0.532567852655263,
|
|
"learning_rate": 9.481923983588508e-06,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13848380744457245,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4391.0,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 4.969742813918305,
|
|
"grad_norm": 0.6157946172693902,
|
|
"learning_rate": 9.417828393908955e-06,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12361279129981995,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2709.5,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 4.977307110438729,
|
|
"grad_norm": 4.872260539190662,
|
|
"learning_rate": 9.353883393003347e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10355688631534576,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3427.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.984871406959153,
|
|
"grad_norm": 0.4991945894191523,
|
|
"learning_rate": 9.290089890836068e-06,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919953882694244,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3569.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.992435703479576,
|
|
"grad_norm": 0.45700242477924763,
|
|
"learning_rate": 9.226448795215598e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10356556624174118,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3824.5,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4922486537716722,
|
|
"learning_rate": 9.162961011781632e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1080213114619255,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4037.4,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 5.007564296520424,
|
|
"grad_norm": 0.449122826489313,
|
|
"learning_rate": 9.099627443992163e-06,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09822921454906464,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3780.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 5.015128593040847,
|
|
"grad_norm": 0.48858300583562186,
|
|
"learning_rate": 9.036448993110603e-06,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10172191262245178,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4338.0,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 5.022692889561271,
|
|
"grad_norm": 0.5543623954655292,
|
|
"learning_rate": 8.97342655819303e-06,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09266623854637146,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2787.5,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 5.030257186081695,
|
|
"grad_norm": 0.5767428568131578,
|
|
"learning_rate": 8.910561036075325e-06,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15153908729553223,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4305.5,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.037821482602118,
|
|
"grad_norm": 0.639049567436693,
|
|
"learning_rate": 8.847853321360423e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13325051963329315,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3116.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.045385779122542,
|
|
"grad_norm": 0.588321920143045,
|
|
"learning_rate": 8.785304306405605e-06,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15932214260101318,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4467.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 5.052950075642965,
|
|
"grad_norm": 0.5726943213089073,
|
|
"learning_rate": 8.722914881309801e-06,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12993863224983215,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3651.0,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 5.0605143721633885,
|
|
"grad_norm": 0.4779610963853435,
|
|
"learning_rate": 8.660685933900869e-06,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10848415642976761,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4743.1,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 5.068078668683812,
|
|
"grad_norm": 0.5446852413329794,
|
|
"learning_rate": 8.59861834972306e-06,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13423976302146912,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4600.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.075642965204236,
|
|
"grad_norm": 0.6248529979526356,
|
|
"learning_rate": 8.536713012024305e-06,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11277179419994354,
|
|
"step": 3355,
|
|
"valid_targets_mean": 2798.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 5.0832072617246595,
|
|
"grad_norm": 0.5294607064712893,
|
|
"learning_rate": 8.474970801743724e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0978393703699112,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3351.1,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 5.090771558245083,
|
|
"grad_norm": 0.7107843580087351,
|
|
"learning_rate": 8.413392597499075e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12164593487977982,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2453.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 5.098335854765507,
|
|
"grad_norm": 0.5340140874058419,
|
|
"learning_rate": 8.351979275574207e-06,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11388534307479858,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3939.8,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 5.1059001512859306,
|
|
"grad_norm": 0.5735089291068755,
|
|
"learning_rate": 8.290731709906643e-06,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10401733219623566,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2701.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 5.113464447806354,
|
|
"grad_norm": 0.6856758889656935,
|
|
"learning_rate": 8.229650772075153e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10597525537014008,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2166.1,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 5.121028744326778,
|
|
"grad_norm": 0.5783297706706122,
|
|
"learning_rate": 8.168737331287269e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12266707420349121,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3984.4,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 5.128593040847202,
|
|
"grad_norm": 0.5191283369410631,
|
|
"learning_rate": 8.107992254367003e-06,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09682321548461914,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3290.9,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 5.136157337367624,
|
|
"grad_norm": 0.5418349749755162,
|
|
"learning_rate": 8.047416405742479e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398833245038986,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3977.9,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 5.143721633888048,
|
|
"grad_norm": 0.6019447063795035,
|
|
"learning_rate": 7.987010647433606e-06,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09537911415100098,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2727.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 5.151285930408472,
|
|
"grad_norm": 0.5252979551023393,
|
|
"learning_rate": 7.926775839039851e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09594881534576416,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3191.9,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 5.158850226928895,
|
|
"grad_norm": 0.6008532457301452,
|
|
"learning_rate": 7.866712837728016e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09772796928882599,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2675.5,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 5.166414523449319,
|
|
"grad_norm": 0.5352969462630807,
|
|
"learning_rate": 7.80682249821997e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08551070094108582,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2692.2,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 5.173978819969743,
|
|
"grad_norm": 0.5478575146270561,
|
|
"learning_rate": 7.747105672780561e-06,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1497330665588379,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4685.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.181543116490166,
|
|
"grad_norm": 0.5624350375905227,
|
|
"learning_rate": 7.68756321120546e-06,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089964210987091,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2979.5,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.18910741301059,
|
|
"grad_norm": 0.4952659365800833,
|
|
"learning_rate": 7.628195960809039e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1156390979886055,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4536.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.196671709531014,
|
|
"grad_norm": 0.5424767289759522,
|
|
"learning_rate": 7.569004766412369e-06,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11298723518848419,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3950.0,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 5.204236006051437,
|
|
"grad_norm": 0.5798621387564599,
|
|
"learning_rate": 7.509990470331159e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12284237891435623,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4435.9,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 5.211800302571861,
|
|
"grad_norm": 0.5045685263582093,
|
|
"learning_rate": 7.451153912363784e-06,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08915689587593079,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 5.219364599092285,
|
|
"grad_norm": 0.6049268361884362,
|
|
"learning_rate": 7.392495929779333e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1321156769990921,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3548.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.2269288956127085,
|
|
"grad_norm": 0.6412957372359686,
|
|
"learning_rate": 7.334017357305674e-06,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14255505800247192,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3746.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 5.234493192133131,
|
|
"grad_norm": 0.5271144908625914,
|
|
"learning_rate": 7.2757190271176115e-06,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1139393150806427,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3803.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 5.242057488653555,
|
|
"grad_norm": 0.5639154534830885,
|
|
"learning_rate": 7.217601768825023e-06,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216428130865097,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3897.5,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 5.249621785173979,
|
|
"grad_norm": 0.5938638713338206,
|
|
"learning_rate": 7.15966640946105e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1187976822257042,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3026.9,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 5.257186081694402,
|
|
"grad_norm": 0.4711935246556062,
|
|
"learning_rate": 7.101913773470346e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1058630496263504,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4376.5,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 5.264750378214826,
|
|
"grad_norm": 0.45578581130394474,
|
|
"learning_rate": 7.044344682697326e-06,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10273635387420654,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4649.1,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 5.27231467473525,
|
|
"grad_norm": 0.5706393015901207,
|
|
"learning_rate": 6.986959956374473e-06,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12840817868709564,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3824.0,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 5.279878971255673,
|
|
"grad_norm": 0.5057399798766884,
|
|
"learning_rate": 6.929760411110698e-06,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12005817890167236,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4271.1,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.287443267776097,
|
|
"grad_norm": 0.5070976505370746,
|
|
"learning_rate": 6.872746860879702e-06,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11866520345211029,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4226.2,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 5.295007564296521,
|
|
"grad_norm": 0.5952335991978547,
|
|
"learning_rate": 6.815920117008399e-06,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13067616522312164,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4246.5,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.302571860816944,
|
|
"grad_norm": 0.6966729275331836,
|
|
"learning_rate": 6.759280988165373e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07262475788593292,
|
|
"step": 3505,
|
|
"valid_targets_mean": 1543.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 5.310136157337368,
|
|
"grad_norm": 0.6645335046220661,
|
|
"learning_rate": 6.702830280349353e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08835335075855255,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2050.4,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 5.317700453857791,
|
|
"grad_norm": 0.5930270953877599,
|
|
"learning_rate": 6.6465687968777725e-06,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09741196036338806,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3359.5,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 5.3252647503782145,
|
|
"grad_norm": 0.5626259815491444,
|
|
"learning_rate": 6.590497338375317e-06,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267971694469452,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3901.0,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 5.332829046898638,
|
|
"grad_norm": 0.5042397061587899,
|
|
"learning_rate": 6.534616702762537e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389683485031128,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4523.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.340393343419062,
|
|
"grad_norm": 0.7317035417443288,
|
|
"learning_rate": 6.478927685244494e-06,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12208820879459381,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2077.0,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.3479576399394855,
|
|
"grad_norm": 0.6125166385178272,
|
|
"learning_rate": 6.423431078299443e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10823240876197815,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3455.5,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 5.355521936459909,
|
|
"grad_norm": 0.5232029216692686,
|
|
"learning_rate": 6.3681276716675435e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10435009747743607,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3393.6,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 5.363086232980333,
|
|
"grad_norm": 0.5950286451274107,
|
|
"learning_rate": 6.3130182523396484e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427772343158722,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2885.8,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 5.3706505295007565,
|
|
"grad_norm": 0.5927648931720869,
|
|
"learning_rate": 6.258103604546087e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588302254676819,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2907.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.37821482602118,
|
|
"grad_norm": 0.591114375802195,
|
|
"learning_rate": 6.2033845097454985e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11765435338020325,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3332.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 5.385779122541604,
|
|
"grad_norm": 0.5740742461444643,
|
|
"learning_rate": 6.14886174661373e-06,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10680969059467316,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3220.1,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.3933434190620275,
|
|
"grad_norm": 0.5135741412736585,
|
|
"learning_rate": 6.0945360910327476e-06,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09846675395965576,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3545.6,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 5.400907715582451,
|
|
"grad_norm": 0.5302883186016151,
|
|
"learning_rate": 6.040408316079575e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11288570612668991,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3847.5,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 5.408472012102875,
|
|
"grad_norm": 0.4868263050608858,
|
|
"learning_rate": 5.986479192015337e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10717683285474777,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3992.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 5.416036308623298,
|
|
"grad_norm": 0.4970189610971978,
|
|
"learning_rate": 5.932749486274239e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131008118391037,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3778.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.423600605143721,
|
|
"grad_norm": 0.5909195387508255,
|
|
"learning_rate": 5.8792199634527205e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10363276302814484,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2826.1,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 5.431164901664145,
|
|
"grad_norm": 0.4923485640548487,
|
|
"learning_rate": 5.82589138529851e-06,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08128589391708374,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3642.9,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.438729198184569,
|
|
"grad_norm": 0.5183361480055171,
|
|
"learning_rate": 5.7727645106998e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08896800875663757,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3087.6,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 5.446293494704992,
|
|
"grad_norm": 0.5832085371263873,
|
|
"learning_rate": 5.719840095674476e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12822987139225006,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3767.5,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.453857791225416,
|
|
"grad_norm": 0.5211741549730257,
|
|
"learning_rate": 5.667118893359331e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09994645416736603,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3986.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 5.46142208774584,
|
|
"grad_norm": 0.5293141393825465,
|
|
"learning_rate": 5.614601653999338e-06,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13067039847373962,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3920.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 5.468986384266263,
|
|
"grad_norm": 0.4696569988258623,
|
|
"learning_rate": 5.5622891249370234e-06,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08374680578708649,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3493.9,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.476550680786687,
|
|
"grad_norm": 0.6478105925896753,
|
|
"learning_rate": 5.5101820506017865e-06,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11758080124855042,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4698.0,
|
|
"valid_targets_min": 3745
|
|
},
|
|
{
|
|
"epoch": 5.484114977307111,
|
|
"grad_norm": 0.5028138672951024,
|
|
"learning_rate": 5.458281172499298e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10294616222381592,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3319.8,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.491679273827534,
|
|
"grad_norm": 0.6323731005697197,
|
|
"learning_rate": 5.406587229200997e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14134512841701508,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4991.4,
|
|
"valid_targets_min": 3488
|
|
},
|
|
{
|
|
"epoch": 5.499243570347957,
|
|
"grad_norm": 0.5034108952009632,
|
|
"learning_rate": 5.355100956333546e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892504245042801,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4084.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 5.506807866868381,
|
|
"grad_norm": 0.5216307868936227,
|
|
"learning_rate": 5.303823086568347e-06,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11038857698440552,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3856.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 5.5143721633888045,
|
|
"grad_norm": 0.4766023397033775,
|
|
"learning_rate": 5.252754349611182e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1175159364938736,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5194.5,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 5.521936459909228,
|
|
"grad_norm": 0.5637933349976505,
|
|
"learning_rate": 5.201895472191743e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16093158721923828,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5303.1,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 5.529500756429652,
|
|
"grad_norm": 0.5164070288488309,
|
|
"learning_rate": 5.151247178053349e-06,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764664947986603,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4011.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 5.537065052950076,
|
|
"grad_norm": 0.5245693416412209,
|
|
"learning_rate": 5.100810187942639e-06,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11702658981084824,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3607.0,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 5.544629349470499,
|
|
"grad_norm": 0.5609851898097948,
|
|
"learning_rate": 5.050585219599289e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09400570392608643,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3302.9,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 5.552193645990923,
|
|
"grad_norm": 0.5462975554468655,
|
|
"learning_rate": 5.0005729877458155e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13201719522476196,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4252.5,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 5.559757942511347,
|
|
"grad_norm": 0.5157430643615113,
|
|
"learning_rate": 4.950774204077433e-06,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14627370238304138,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3872.9,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 5.56732223903177,
|
|
"grad_norm": 0.5877711100763277,
|
|
"learning_rate": 4.901189577251864e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10049215704202652,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2931.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.574886535552194,
|
|
"grad_norm": 0.543640316542256,
|
|
"learning_rate": 4.851819812879303e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13745558261871338,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4417.0,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 5.582450832072618,
|
|
"grad_norm": 0.6107571175475929,
|
|
"learning_rate": 4.80266561351237e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09468571841716766,
|
|
"step": 3690,
|
|
"valid_targets_mean": 2294.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.590015128593041,
|
|
"grad_norm": 0.48461853468922805,
|
|
"learning_rate": 4.753727678636082e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12260302156209946,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4554.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.597579425113464,
|
|
"grad_norm": 0.5838315580464947,
|
|
"learning_rate": 4.7050067046579324e-06,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.195402130484581,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5391.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.605143721633888,
|
|
"grad_norm": 0.5254357331754452,
|
|
"learning_rate": 4.656503384897988e-06,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1002720519900322,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3504.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.612708018154311,
|
|
"grad_norm": 0.5850947517859505,
|
|
"learning_rate": 4.6082184095789686e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.151791512966156,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4331.5,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.620272314674735,
|
|
"grad_norm": 0.5425578412190243,
|
|
"learning_rate": 4.56015246581649e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11020559072494507,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3797.9,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.627836611195159,
|
|
"grad_norm": 0.48945811060899597,
|
|
"learning_rate": 4.512306237609232e-06,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10962755978107452,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4644.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 5.635400907715582,
|
|
"grad_norm": 0.5983872354698505,
|
|
"learning_rate": 4.464680405829249e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11576041579246521,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3983.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.642965204236006,
|
|
"grad_norm": 0.5591528999009988,
|
|
"learning_rate": 4.4172756482122535e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14418798685073853,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4599.8,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 5.65052950075643,
|
|
"grad_norm": 0.5601816817838403,
|
|
"learning_rate": 4.370092639347978e-06,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14422723650932312,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4596.2,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 5.6580937972768535,
|
|
"grad_norm": 0.5600494014674734,
|
|
"learning_rate": 4.3231320506705775e-06,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12326321750879288,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4538.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.665658093797277,
|
|
"grad_norm": 0.49448381625627236,
|
|
"learning_rate": 4.2763945504490835e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11729425191879272,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4507.4,
|
|
"valid_targets_min": 3792
|
|
},
|
|
{
|
|
"epoch": 5.673222390317701,
|
|
"grad_norm": 0.5291149918725461,
|
|
"learning_rate": 4.229880803777859e-06,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10756662487983704,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4034.1,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 5.680786686838124,
|
|
"grad_norm": 0.5468120023585848,
|
|
"learning_rate": 4.183591472567186e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262592375278473,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3649.0,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 5.688350983358547,
|
|
"grad_norm": 0.52963998863326,
|
|
"learning_rate": 4.137527215533805e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10323531925678253,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3386.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 5.695915279878971,
|
|
"grad_norm": 0.5508223723168315,
|
|
"learning_rate": 4.091688688191564e-06,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11313928663730621,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3497.0,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 5.703479576399395,
|
|
"grad_norm": 0.5517175217533992,
|
|
"learning_rate": 4.046076542842077e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10223741829395294,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3137.2,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.711043872919818,
|
|
"grad_norm": 0.5880688814263535,
|
|
"learning_rate": 4.000691428565453e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697086900472641,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2858.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.718608169440242,
|
|
"grad_norm": 0.5984695268689004,
|
|
"learning_rate": 3.9555339912110355e-06,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12040106952190399,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3300.5,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.726172465960666,
|
|
"grad_norm": 0.5902494912416314,
|
|
"learning_rate": 3.910604873388248e-06,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301366686820984,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3253.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.733736762481089,
|
|
"grad_norm": 0.5416669205262461,
|
|
"learning_rate": 3.8659047144574245e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919160604476929,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3378.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 5.741301059001513,
|
|
"grad_norm": 0.5328509417373014,
|
|
"learning_rate": 3.821434150520715e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11845898628234863,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4515.0,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 5.748865355521937,
|
|
"grad_norm": 0.42778813680710476,
|
|
"learning_rate": 3.777193814413045e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09324002265930176,
|
|
"step": 3800,
|
|
"valid_targets_mean": 4874.9,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 5.75642965204236,
|
|
"grad_norm": 0.4392660567656385,
|
|
"learning_rate": 3.7331843356930806e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0949675664305687,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4577.6,
|
|
"valid_targets_min": 3296
|
|
},
|
|
{
|
|
"epoch": 5.763993948562784,
|
|
"grad_norm": 0.5489391021047781,
|
|
"learning_rate": 3.6894063406343094e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13235178589820862,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4513.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 5.771558245083208,
|
|
"grad_norm": 0.5351037975690247,
|
|
"learning_rate": 3.645860452216099e-06,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07503627985715866,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2439.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.7791225416036305,
|
|
"grad_norm": 0.5296733667629648,
|
|
"learning_rate": 3.6025472901148463e-06,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0979020893573761,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3563.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.786686838124054,
|
|
"grad_norm": 0.5235669847505989,
|
|
"learning_rate": 3.5594674706951505e-06,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10928912460803986,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3744.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 5.794251134644478,
|
|
"grad_norm": 0.48374779766958986,
|
|
"learning_rate": 3.5166216070010538e-06,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11196106672286987,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4721.8,
|
|
"valid_targets_min": 3855
|
|
},
|
|
{
|
|
"epoch": 5.8018154311649015,
|
|
"grad_norm": 0.4871413591420864,
|
|
"learning_rate": 3.474010308747291e-06,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11395436525344849,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4396.9,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 5.809379727685325,
|
|
"grad_norm": 0.5249108179331049,
|
|
"learning_rate": 3.431634182310648e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480716407299042,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4164.4,
|
|
"valid_targets_min": 3689
|
|
},
|
|
{
|
|
"epoch": 5.816944024205749,
|
|
"grad_norm": 0.6030971890878687,
|
|
"learning_rate": 3.3894938307213152e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382315307855606,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3674.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 5.8245083207261725,
|
|
"grad_norm": 0.5733268334417382,
|
|
"learning_rate": 3.3475898536543027e-06,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11047233641147614,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3233.5,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.832072617246596,
|
|
"grad_norm": 0.5782736591087324,
|
|
"learning_rate": 3.305922847420917e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12621629238128662,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3784.2,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 5.83963691376702,
|
|
"grad_norm": 0.5386262591565005,
|
|
"learning_rate": 3.2644934049602563e-06,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10238965600728989,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3572.4,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 5.8472012102874436,
|
|
"grad_norm": 0.5882531578141996,
|
|
"learning_rate": 3.2233021158307977e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09710794687271118,
|
|
"step": 3865,
|
|
"valid_targets_mean": 2457.5,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 5.854765506807867,
|
|
"grad_norm": 0.5337215237913356,
|
|
"learning_rate": 3.1823495662019945e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1113915890455246,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4047.9,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.86232980332829,
|
|
"grad_norm": 0.5188071083781799,
|
|
"learning_rate": 3.1416363388459327e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09841391444206238,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3234.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.869894099848714,
|
|
"grad_norm": 0.5211127531993051,
|
|
"learning_rate": 3.101163013129045e-06,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12311208993196487,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3946.9,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 5.877458396369137,
|
|
"grad_norm": 0.4967364404557958,
|
|
"learning_rate": 3.0609301650038636e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0963066816329956,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3839.8,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 5.885022692889561,
|
|
"grad_norm": 0.4927150861516569,
|
|
"learning_rate": 3.02093836700081e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11219263076782227,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5074.8,
|
|
"valid_targets_min": 3986
|
|
},
|
|
{
|
|
"epoch": 5.892586989409985,
|
|
"grad_norm": 0.569500182200026,
|
|
"learning_rate": 2.9811881882200743e-06,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08702008426189423,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2605.2,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.900151285930408,
|
|
"grad_norm": 0.6133603710046859,
|
|
"learning_rate": 2.9416801943234998e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0889616459608078,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2344.0,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 5.907715582450832,
|
|
"grad_norm": 0.5358621550023456,
|
|
"learning_rate": 2.9024149475265373e-06,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11849069595336914,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3760.9,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 5.915279878971256,
|
|
"grad_norm": 0.5351793055149552,
|
|
"learning_rate": 2.863393006590238e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10792721807956696,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2882.1,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.922844175491679,
|
|
"grad_norm": 0.6228665365040051,
|
|
"learning_rate": 2.8246149268133204e-06,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12268434464931488,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.930408472012103,
|
|
"grad_norm": 0.5186784110206799,
|
|
"learning_rate": 2.786081260024236e-06,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0900840163230896,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3477.9,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 5.937972768532527,
|
|
"grad_norm": 0.6076742297851535,
|
|
"learning_rate": 2.747792554573352e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0953172892332077,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2125.0,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 5.94553706505295,
|
|
"grad_norm": 0.6095727699113964,
|
|
"learning_rate": 2.7097493553251307e-06,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11913371831178665,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2963.9,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 5.953101361573374,
|
|
"grad_norm": 0.5264791765676482,
|
|
"learning_rate": 2.6719522036503654e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08075821399688721,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3046.2,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.960665658093797,
|
|
"grad_norm": 0.5358754181046668,
|
|
"learning_rate": 2.634401637418511e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385650634765625,
|
|
"step": 3940,
|
|
"valid_targets_mean": 4042.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.968229954614221,
|
|
"grad_norm": 0.5051172805194647,
|
|
"learning_rate": 2.5970981909899817e-06,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12057952582836151,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4095.2,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 5.975794251134644,
|
|
"grad_norm": 0.5533006639707363,
|
|
"learning_rate": 2.5600423952085884e-06,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393482774496078,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4100.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.983358547655068,
|
|
"grad_norm": 0.49327123157809877,
|
|
"learning_rate": 2.5232347773939704e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09166641533374786,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4847.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.990922844175492,
|
|
"grad_norm": 0.5513564962551734,
|
|
"learning_rate": 2.4866758613340734e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418332159519196,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3724.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.998487140695915,
|
|
"grad_norm": 0.5525091252018063,
|
|
"learning_rate": 2.4503661672777244e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11277347803115845,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3906.5,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.006051437216339,
|
|
"grad_norm": 0.45352727984538066,
|
|
"learning_rate": 2.4143062119272263e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09691131114959717,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4330.2,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 6.013615733736763,
|
|
"grad_norm": 0.5957814416062671,
|
|
"learning_rate": 2.3784965084309697e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10840865969657898,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3184.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.021180030257186,
|
|
"grad_norm": 0.5895704197166134,
|
|
"learning_rate": 2.3429375663761734e-06,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129806786775589,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3306.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 6.02874432677761,
|
|
"grad_norm": 0.5413203557416694,
|
|
"learning_rate": 2.307629891781611e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136664986610413,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4330.4,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.036308623298034,
|
|
"grad_norm": 0.5236850551161747,
|
|
"learning_rate": 2.2725739870904075e-06,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11980330944061279,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4893.6,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 6.043872919818457,
|
|
"grad_norm": 0.6603537533515608,
|
|
"learning_rate": 2.2377703511629023e-06,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0936698466539383,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3348.6,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 6.05143721633888,
|
|
"grad_norm": 0.49564986856836585,
|
|
"learning_rate": 2.2032194792695517e-06,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120844267308712,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3994.6,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.059001512859304,
|
|
"grad_norm": 0.5792241104690146,
|
|
"learning_rate": 2.1689218630838528e-06,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08625797182321548,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2681.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 6.0665658093797274,
|
|
"grad_norm": 0.5611824896507053,
|
|
"learning_rate": 2.1348779906753856e-06,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12583735585212708,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3097.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.074130105900151,
|
|
"grad_norm": 0.545894524006989,
|
|
"learning_rate": 2.101088346502833e-06,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1237238198518753,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4516.0,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 6.081694402420575,
|
|
"grad_norm": 0.5277319929067911,
|
|
"learning_rate": 2.067553411407117e-06,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11495663225650787,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3717.8,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 6.0892586989409985,
|
|
"grad_norm": 0.5158098665303287,
|
|
"learning_rate": 2.0342736626045356e-06,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10669147968292236,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3464.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 6.096822995461422,
|
|
"grad_norm": 0.5346094921296929,
|
|
"learning_rate": 2.0012495736799753e-06,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11695527285337448,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4088.2,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 6.104387291981846,
|
|
"grad_norm": 0.5506001797534098,
|
|
"learning_rate": 1.9684816145801776e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10287513583898544,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3466.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.1119515885022695,
|
|
"grad_norm": 0.8181102407236679,
|
|
"learning_rate": 1.9359702516070553e-06,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11554419994354248,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3140.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.119515885022693,
|
|
"grad_norm": 0.7531805518572948,
|
|
"learning_rate": 1.9037159474110333e-06,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446792483329773,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3889.9,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 6.127080181543117,
|
|
"grad_norm": 0.6285369557231075,
|
|
"learning_rate": 1.8717191609844931e-06,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13148529827594757,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3155.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.1346444780635405,
|
|
"grad_norm": 0.5986059219651446,
|
|
"learning_rate": 1.8399803476552303e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09008555114269257,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2831.5,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.142208774583963,
|
|
"grad_norm": 0.62523024723556,
|
|
"learning_rate": 1.8084999590799678e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269061267375946,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3279.9,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 6.149773071104387,
|
|
"grad_norm": 0.5388433375387096,
|
|
"learning_rate": 1.7772784432379398e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0928097665309906,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3280.4,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 6.157337367624811,
|
|
"grad_norm": 0.5655558658679207,
|
|
"learning_rate": 1.7463162444245174e-06,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1104118674993515,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3934.4,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 6.164901664145234,
|
|
"grad_norm": 0.5477443157171311,
|
|
"learning_rate": 1.7156138032448621e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11364386230707169,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3346.0,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 6.172465960665658,
|
|
"grad_norm": 0.5516502081339758,
|
|
"learning_rate": 1.6851715566076942e-06,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10026207566261292,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3075.0,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.180030257186082,
|
|
"grad_norm": 0.5374387361668918,
|
|
"learning_rate": 1.6549899377190448e-06,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11470025777816772,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4053.9,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 6.187594553706505,
|
|
"grad_norm": 0.56172359419519,
|
|
"learning_rate": 1.6250693760761072e-06,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1305641233921051,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3778.2,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 6.195158850226929,
|
|
"grad_norm": 0.5203313296458221,
|
|
"learning_rate": 1.5954102974611218e-06,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11380473524332047,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4015.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.202723146747353,
|
|
"grad_norm": 0.5387447293286791,
|
|
"learning_rate": 1.5660131239353037e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09115037322044373,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2968.4,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.210287443267776,
|
|
"grad_norm": 0.6569556632196163,
|
|
"learning_rate": 1.536878273832858e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10455656051635742,
|
|
"step": 4105,
|
|
"valid_targets_mean": 2443.0,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 6.2178517397882,
|
|
"grad_norm": 0.5186933603852502,
|
|
"learning_rate": 1.5080061617550157e-06,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08145958185195923,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2906.2,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 6.225416036308624,
|
|
"grad_norm": 0.6542263151921909,
|
|
"learning_rate": 1.4793971985641298e-06,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10048550367355347,
|
|
"step": 4115,
|
|
"valid_targets_mean": 2127.5,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 6.2329803328290465,
|
|
"grad_norm": 0.5787158093127518,
|
|
"learning_rate": 1.45105179137784e-06,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17382967472076416,
|
|
"step": 4120,
|
|
"valid_targets_mean": 5392.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 6.24054462934947,
|
|
"grad_norm": 0.5460750910901433,
|
|
"learning_rate": 1.4229703435632702e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038914114236832,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3857.6,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 6.248108925869894,
|
|
"grad_norm": 0.595586390528161,
|
|
"learning_rate": 1.395153254731285e-06,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10090851038694382,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3161.4,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 6.2556732223903175,
|
|
"grad_norm": 0.6912305819088636,
|
|
"learning_rate": 1.367600920730816e-06,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12177952378988266,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2418.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.263237518910741,
|
|
"grad_norm": 0.5117979745497702,
|
|
"learning_rate": 1.3403137336432193e-06,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0830102413892746,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3026.8,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 6.270801815431165,
|
|
"grad_norm": 0.5334390637422874,
|
|
"learning_rate": 1.313292081776698e-06,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10060387849807739,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3685.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 6.278366111951589,
|
|
"grad_norm": 0.6228768374120917,
|
|
"learning_rate": 1.286536349660783e-06,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08537262678146362,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2022.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 6.285930408472012,
|
|
"grad_norm": 0.5428689742954951,
|
|
"learning_rate": 1.2600469180408403e-06,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10087640583515167,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4174.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 6.293494704992436,
|
|
"grad_norm": 0.5766246771078479,
|
|
"learning_rate": 1.2338241638726811e-06,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11865511536598206,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3800.6,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 6.30105900151286,
|
|
"grad_norm": 0.5881669084542495,
|
|
"learning_rate": 1.2078684603171787e-06,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10526623576879501,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3082.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.308623298033283,
|
|
"grad_norm": 0.6595457805649002,
|
|
"learning_rate": 1.1821801767349616e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14501163363456726,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3612.8,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 6.316187594553707,
|
|
"grad_norm": 0.5461423372428273,
|
|
"learning_rate": 1.1567596786811652e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11895900219678879,
|
|
"step": 4175,
|
|
"valid_targets_mean": 4161.2,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 6.32375189107413,
|
|
"grad_norm": 0.6226314943116594,
|
|
"learning_rate": 1.1316073279002172e-06,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11480500549077988,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4005.0,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 6.331316187594553,
|
|
"grad_norm": 0.49172636471242304,
|
|
"learning_rate": 1.1067234823206951e-06,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12670624256134033,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5089.0,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 6.338880484114977,
|
|
"grad_norm": 0.6547846676712972,
|
|
"learning_rate": 1.0821084960502404e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11138174682855606,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2883.0,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 6.346444780635401,
|
|
"grad_norm": 0.6259171969089032,
|
|
"learning_rate": 1.0577627193705098e-06,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09065258502960205,
|
|
"step": 4195,
|
|
"valid_targets_mean": 2482.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 6.354009077155824,
|
|
"grad_norm": 0.5342948069929456,
|
|
"learning_rate": 1.0336864987321938e-06,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11155544966459274,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4771.8,
|
|
"valid_targets_min": 2884
|
|
},
|
|
{
|
|
"epoch": 6.361573373676248,
|
|
"grad_norm": 0.496291786324435,
|
|
"learning_rate": 1.0098801767500842e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09988661855459213,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4148.9,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 6.369137670196672,
|
|
"grad_norm": 0.5441877407705061,
|
|
"learning_rate": 9.863440921982104e-07,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09365123510360718,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3768.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.376701966717095,
|
|
"grad_norm": 0.6174821658982794,
|
|
"learning_rate": 9.630785800049947e-07,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.104268878698349,
|
|
"step": 4215,
|
|
"valid_targets_mean": 2617.1,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 6.384266263237519,
|
|
"grad_norm": 0.613009175482621,
|
|
"learning_rate": 9.40083971248511e-07,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09341217577457428,
|
|
"step": 4220,
|
|
"valid_targets_mean": 2642.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.391830559757943,
|
|
"grad_norm": 0.6144656597740489,
|
|
"learning_rate": 9.173605931517526e-07,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07638631761074066,
|
|
"step": 4225,
|
|
"valid_targets_mean": 2078.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 6.3993948562783665,
|
|
"grad_norm": 0.6211401158070364,
|
|
"learning_rate": 8.949087690780023e-07,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346590220928192,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3319.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.406959152798789,
|
|
"grad_norm": 0.5709969541245438,
|
|
"learning_rate": 8.727288185262029e-07,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0999768078327179,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3049.5,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.414523449319213,
|
|
"grad_norm": 0.5606366072163738,
|
|
"learning_rate": 8.508210571264186e-07,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1318577229976654,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4668.8,
|
|
"valid_targets_min": 3378
|
|
},
|
|
{
|
|
"epoch": 6.422087745839637,
|
|
"grad_norm": 0.5580967595788073,
|
|
"learning_rate": 8.291857966353545e-07,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09770334511995316,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3659.5,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.42965204236006,
|
|
"grad_norm": 0.5357390141161772,
|
|
"learning_rate": 8.078233449319128e-07,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10540822148323059,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4526.9,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 6.437216338880484,
|
|
"grad_norm": 0.6518015178478276,
|
|
"learning_rate": 7.867340060128037e-07,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0827440470457077,
|
|
"step": 4255,
|
|
"valid_targets_mean": 2510.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.444780635400908,
|
|
"grad_norm": 0.5402209087188705,
|
|
"learning_rate": 7.659180799882371e-07,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09110354632139206,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3829.9,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 6.452344931921331,
|
|
"grad_norm": 0.5338316741599194,
|
|
"learning_rate": 7.453758630776398e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11574132740497589,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4008.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 6.459909228441755,
|
|
"grad_norm": 0.5163391554985158,
|
|
"learning_rate": 7.25107647605432e-07,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13353493809700012,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3854.1,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 6.467473524962179,
|
|
"grad_norm": 0.48892056264914774,
|
|
"learning_rate": 7.051137219968885e-07,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0968899056315422,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3780.4,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 6.475037821482602,
|
|
"grad_norm": 0.8380846888052359,
|
|
"learning_rate": 6.853943707740218e-07,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11159957945346832,
|
|
"step": 4280,
|
|
"valid_targets_mean": 1500.0,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.482602118003026,
|
|
"grad_norm": 0.5099169912907282,
|
|
"learning_rate": 6.659498745515258e-07,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10653482377529144,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3723.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.49016641452345,
|
|
"grad_norm": 0.4977149850673482,
|
|
"learning_rate": 6.467805100328117e-07,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10802928358316422,
|
|
"step": 4290,
|
|
"valid_targets_mean": 4210.2,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.497730711043873,
|
|
"grad_norm": 0.6103254355663807,
|
|
"learning_rate": 6.278865500060271e-07,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09756436198949814,
|
|
"step": 4295,
|
|
"valid_targets_mean": 2797.5,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 6.505295007564296,
|
|
"grad_norm": 0.6094015398865752,
|
|
"learning_rate": 6.092682633402103e-07,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473299264907837,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4811.2,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 6.51285930408472,
|
|
"grad_norm": 0.5737017278134778,
|
|
"learning_rate": 5.909259149814505e-07,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11430441588163376,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3443.8,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 6.5204236006051435,
|
|
"grad_norm": 0.5857543624726874,
|
|
"learning_rate": 5.728597659491142e-07,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14544573426246643,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4195.5,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 6.527987897125567,
|
|
"grad_norm": 0.5513950976392505,
|
|
"learning_rate": 5.550700733321379e-07,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09965312480926514,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3054.6,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 6.535552193645991,
|
|
"grad_norm": 0.507483945143279,
|
|
"learning_rate": 5.375570902853633e-07,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13092470169067383,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4330.6,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 6.5431164901664145,
|
|
"grad_norm": 0.4966522998001979,
|
|
"learning_rate": 5.203210660259439e-07,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11345656961202621,
|
|
"step": 4325,
|
|
"valid_targets_mean": 4711.2,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 6.550680786686838,
|
|
"grad_norm": 0.5583679424343055,
|
|
"learning_rate": 5.033622458297859e-07,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11525959521532059,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4284.6,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.558245083207262,
|
|
"grad_norm": 0.5231524768028083,
|
|
"learning_rate": 4.866808710280691e-07,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09414457529783249,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3437.0,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 6.5658093797276855,
|
|
"grad_norm": 0.5220333780950523,
|
|
"learning_rate": 4.702771790038041e-07,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220380961894989,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4126.5,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.573373676248109,
|
|
"grad_norm": 0.4989031390013792,
|
|
"learning_rate": 4.5415140318846306e-07,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10389334708452225,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3939.1,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 6.580937972768533,
|
|
"grad_norm": 0.4720398054836142,
|
|
"learning_rate": 4.383037730586481e-07,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11403128504753113,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4518.5,
|
|
"valid_targets_min": 3312
|
|
},
|
|
{
|
|
"epoch": 6.588502269288956,
|
|
"grad_norm": 0.5569444155863092,
|
|
"learning_rate": 4.227345141328343e-07,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791186988353729,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3010.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 6.59606656580938,
|
|
"grad_norm": 0.6614006380916511,
|
|
"learning_rate": 4.074438479681564e-07,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12045466154813766,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3941.6,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.603630862329803,
|
|
"grad_norm": 0.6603036443106318,
|
|
"learning_rate": 3.924319921572561e-07,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139170080423355,
|
|
"step": 4365,
|
|
"valid_targets_mean": 2850.2,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.611195158850227,
|
|
"grad_norm": 0.528429966548231,
|
|
"learning_rate": 3.7769916032518227e-07,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10204044729471207,
|
|
"step": 4370,
|
|
"valid_targets_mean": 3904.8,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 6.61875945537065,
|
|
"grad_norm": 0.5987414358267925,
|
|
"learning_rate": 3.63245562126362e-07,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11766856163740158,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4619.4,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 6.626323751891074,
|
|
"grad_norm": 0.6499151112735795,
|
|
"learning_rate": 3.490714032416032e-07,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10644233226776123,
|
|
"step": 4380,
|
|
"valid_targets_mean": 3116.0,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 6.633888048411498,
|
|
"grad_norm": 0.5640797173421002,
|
|
"learning_rate": 3.351768853751769e-07,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12750622630119324,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3494.1,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 6.641452344931921,
|
|
"grad_norm": 0.5404758670600868,
|
|
"learning_rate": 3.2156220625194633e-07,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12458965182304382,
|
|
"step": 4390,
|
|
"valid_targets_mean": 4592.9,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 6.649016641452345,
|
|
"grad_norm": 0.591428318019953,
|
|
"learning_rate": 3.082275596145445e-07,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10034077614545822,
|
|
"step": 4395,
|
|
"valid_targets_mean": 3056.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 6.656580937972769,
|
|
"grad_norm": 0.5049993387668485,
|
|
"learning_rate": 2.951731352206322e-07,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09603601694107056,
|
|
"step": 4400,
|
|
"valid_targets_mean": 4121.2,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 6.664145234493192,
|
|
"grad_norm": 0.5524051818136108,
|
|
"learning_rate": 2.8239911884018423e-07,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0983380377292633,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3492.6,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.671709531013616,
|
|
"grad_norm": 0.5523594530876426,
|
|
"learning_rate": 2.69905692252852e-07,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10718788951635361,
|
|
"step": 4410,
|
|
"valid_targets_mean": 3477.0,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.67927382753404,
|
|
"grad_norm": 0.6524973382308369,
|
|
"learning_rate": 2.576930332453742e-07,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11703108251094818,
|
|
"step": 4415,
|
|
"valid_targets_mean": 3019.2,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 6.6868381240544625,
|
|
"grad_norm": 0.5707802322673707,
|
|
"learning_rate": 2.4576131560905216e-07,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14223608374595642,
|
|
"step": 4420,
|
|
"valid_targets_mean": 3998.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.694402420574886,
|
|
"grad_norm": 0.564085415980068,
|
|
"learning_rate": 2.341107091372674e-07,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09779548645019531,
|
|
"step": 4425,
|
|
"valid_targets_mean": 2618.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.70196671709531,
|
|
"grad_norm": 0.5749924196450357,
|
|
"learning_rate": 2.2274137962307264e-07,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08815032243728638,
|
|
"step": 4430,
|
|
"valid_targets_mean": 2726.9,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 6.709531013615734,
|
|
"grad_norm": 0.5759686706741584,
|
|
"learning_rate": 2.1165348885683557e-07,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07966586947441101,
|
|
"step": 4435,
|
|
"valid_targets_mean": 2608.4,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.717095310136157,
|
|
"grad_norm": 0.5970974052036359,
|
|
"learning_rate": 2.0084719462392544e-07,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390194296836853,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4046.5,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 6.724659606656581,
|
|
"grad_norm": 0.495814575793362,
|
|
"learning_rate": 1.903226507024769e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10173652321100235,
|
|
"step": 4445,
|
|
"valid_targets_mean": 4411.0,
|
|
"valid_targets_min": 3538
|
|
},
|
|
{
|
|
"epoch": 6.732223903177005,
|
|
"grad_norm": 0.5802084968159188,
|
|
"learning_rate": 1.800800068611941e-07,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10299307107925415,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3602.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.739788199697428,
|
|
"grad_norm": 0.5353294476685445,
|
|
"learning_rate": 1.7011940885723222e-07,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12711367011070251,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4309.5,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.747352496217852,
|
|
"grad_norm": 0.5891583042081088,
|
|
"learning_rate": 1.60440998434106e-07,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09453451633453369,
|
|
"step": 4460,
|
|
"valid_targets_mean": 2763.0,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.754916792738276,
|
|
"grad_norm": 0.5656565934763355,
|
|
"learning_rate": 1.5104491331968674e-07,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12100706249475479,
|
|
"step": 4465,
|
|
"valid_targets_mean": 4621.0,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.762481089258699,
|
|
"grad_norm": 0.5061492887335253,
|
|
"learning_rate": 1.4193128722423954e-07,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394297182559967,
|
|
"step": 4470,
|
|
"valid_targets_mean": 4950.6,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.770045385779122,
|
|
"grad_norm": 0.5452920432576506,
|
|
"learning_rate": 1.3310024983851367e-07,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09219080209732056,
|
|
"step": 4475,
|
|
"valid_targets_mean": 3517.5,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 6.777609682299547,
|
|
"grad_norm": 0.5342486698923148,
|
|
"learning_rate": 1.2455192683189955e-07,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11762027442455292,
|
|
"step": 4480,
|
|
"valid_targets_mean": 4365.6,
|
|
"valid_targets_min": 3497
|
|
},
|
|
{
|
|
"epoch": 6.785173978819969,
|
|
"grad_norm": 0.48480136134523494,
|
|
"learning_rate": 1.1628643985064802e-07,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09521681070327759,
|
|
"step": 4485,
|
|
"valid_targets_mean": 3870.9,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.792738275340393,
|
|
"grad_norm": 0.5738276198750686,
|
|
"learning_rate": 1.0830390651613399e-07,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10769706219434738,
|
|
"step": 4490,
|
|
"valid_targets_mean": 5024.1,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 6.800302571860817,
|
|
"grad_norm": 0.5939404503019104,
|
|
"learning_rate": 1.0060444042317984e-07,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14298078417778015,
|
|
"step": 4495,
|
|
"valid_targets_mean": 4425.4,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 6.80786686838124,
|
|
"grad_norm": 0.4787663522803771,
|
|
"learning_rate": 9.318815113843915e-08,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10291831195354462,
|
|
"step": 4500,
|
|
"valid_targets_mean": 3978.0,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 6.815431164901664,
|
|
"grad_norm": 1.1043037013720505,
|
|
"learning_rate": 8.605514419884442e-08,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12941566109657288,
|
|
"step": 4505,
|
|
"valid_targets_mean": 3219.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 6.822995461422088,
|
|
"grad_norm": 0.4253275257497347,
|
|
"learning_rate": 7.92055211100995e-08,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09218446910381317,
|
|
"step": 4510,
|
|
"valid_targets_mean": 4162.4,
|
|
"valid_targets_min": 3381
|
|
},
|
|
{
|
|
"epoch": 6.8305597579425115,
|
|
"grad_norm": 0.609841719134409,
|
|
"learning_rate": 7.263937934523402e-08,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10570907592773438,
|
|
"step": 4515,
|
|
"valid_targets_mean": 2748.0,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 6.838124054462935,
|
|
"grad_norm": 0.6797656897282006,
|
|
"learning_rate": 6.635681234321789e-08,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1111188530921936,
|
|
"step": 4520,
|
|
"valid_targets_mean": 3507.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 6.845688350983359,
|
|
"grad_norm": 0.5439892198768086,
|
|
"learning_rate": 6.035790950764008e-08,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10245954990386963,
|
|
"step": 4525,
|
|
"valid_targets_mean": 4070.0,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.8532526475037825,
|
|
"grad_norm": 0.5478065536243139,
|
|
"learning_rate": 5.464275620542081e-08,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09041962772607803,
|
|
"step": 4530,
|
|
"valid_targets_mean": 3234.0,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 6.860816944024206,
|
|
"grad_norm": 0.5014030270199872,
|
|
"learning_rate": 4.921143376560355e-08,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08557724952697754,
|
|
"step": 4535,
|
|
"valid_targets_mean": 3668.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.868381240544629,
|
|
"grad_norm": 0.6154405032316008,
|
|
"learning_rate": 4.4064019478207154e-08,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11303947865962982,
|
|
"step": 4540,
|
|
"valid_targets_mean": 3562.0,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.875945537065053,
|
|
"grad_norm": 0.5109060520675038,
|
|
"learning_rate": 3.920058659310666e-08,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09950929880142212,
|
|
"step": 4545,
|
|
"valid_targets_mean": 4332.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 6.883509833585476,
|
|
"grad_norm": 0.5280471272177006,
|
|
"learning_rate": 3.4621204319011946e-08,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10823078453540802,
|
|
"step": 4550,
|
|
"valid_targets_mean": 3441.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 6.8910741301059,
|
|
"grad_norm": 0.5696034883956473,
|
|
"learning_rate": 3.032593782246629e-08,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12375147640705109,
|
|
"step": 4555,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.898638426626324,
|
|
"grad_norm": 0.5495094464689326,
|
|
"learning_rate": 2.6314848226927094e-08,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10411810874938965,
|
|
"step": 4560,
|
|
"valid_targets_mean": 3893.0,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 6.906202723146747,
|
|
"grad_norm": 0.665757152214526,
|
|
"learning_rate": 2.258799261189326e-08,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1015201061964035,
|
|
"step": 4565,
|
|
"valid_targets_mean": 2847.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.913767019667171,
|
|
"grad_norm": 0.5927690112630639,
|
|
"learning_rate": 1.9145424012096957e-08,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0873643010854721,
|
|
"step": 4570,
|
|
"valid_targets_mean": 3379.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.921331316187595,
|
|
"grad_norm": 0.5013254033583111,
|
|
"learning_rate": 1.5987191416744208e-08,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09522250294685364,
|
|
"step": 4575,
|
|
"valid_targets_mean": 3212.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.928895612708018,
|
|
"grad_norm": 0.49157994211190065,
|
|
"learning_rate": 1.3113339768817679e-08,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0723317414522171,
|
|
"step": 4580,
|
|
"valid_targets_mean": 3322.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 6.936459909228442,
|
|
"grad_norm": 0.6707268729367711,
|
|
"learning_rate": 1.0523909964441636e-08,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10077203065156937,
|
|
"step": 4585,
|
|
"valid_targets_mean": 4096.8,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 6.944024205748866,
|
|
"grad_norm": 0.6354810815790946,
|
|
"learning_rate": 8.218938852295744e-09,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09913225471973419,
|
|
"step": 4590,
|
|
"valid_targets_mean": 2324.1,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 6.9515885022692885,
|
|
"grad_norm": 0.6043623478372065,
|
|
"learning_rate": 6.1984592330954776e-09,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10006999969482422,
|
|
"step": 4595,
|
|
"valid_targets_mean": 2798.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 6.959152798789713,
|
|
"grad_norm": 0.5580024699358507,
|
|
"learning_rate": 4.4624998591191735e-09,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10977357625961304,
|
|
"step": 4600,
|
|
"valid_targets_mean": 3804.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 6.966717095310136,
|
|
"grad_norm": 0.65726470089038,
|
|
"learning_rate": 3.0110854337994654e-09,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17236720025539398,
|
|
"step": 4605,
|
|
"valid_targets_mean": 4380.5,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 6.9742813918305595,
|
|
"grad_norm": 0.5453355542455656,
|
|
"learning_rate": 1.8442366113791132e-09,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10006083548069,
|
|
"step": 4610,
|
|
"valid_targets_mean": 3036.4,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 6.981845688350983,
|
|
"grad_norm": 0.6360243381178508,
|
|
"learning_rate": 9.619699966090245e-10,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1032380536198616,
|
|
"step": 4615,
|
|
"valid_targets_mean": 3002.4,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 6.989409984871407,
|
|
"grad_norm": 0.5031803751572348,
|
|
"learning_rate": 3.642981445173277e-10,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10175298899412155,
|
|
"step": 4620,
|
|
"valid_targets_mean": 4508.6,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.9969742813918305,
|
|
"grad_norm": 0.6242880310459517,
|
|
"learning_rate": 5.1229560225074525e-11,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09943732619285583,
|
|
"step": 4625,
|
|
"valid_targets_mean": 2550.2,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1307346224784851,
|
|
"step": 4627,
|
|
"total_flos": 2.2423052373794488e+18,
|
|
"train_loss": 0.2677417782024403,
|
|
"train_runtime": 57225.1316,
|
|
"train_samples_per_second": 1.294,
|
|
"train_steps_per_second": 0.081,
|
|
"valid_targets_mean": 5411.9,
|
|
"valid_targets_min": 3923
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4627,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.2423052373794488e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|