Files
nemotron-terminal-file_oper…/trainer_state.json

2629 lines
73 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1176,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02982107355864811,
"grad_norm": 13.09333147683712,
"learning_rate": 1.3559322033898307e-06,
"loss": 0.9066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3346264064311981,
"step": 5,
"valid_targets_mean": 9695.2,
"valid_targets_min": 3418
},
{
"epoch": 0.05964214711729622,
"grad_norm": 9.149937267728706,
"learning_rate": 3.0508474576271192e-06,
"loss": 0.8761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28128373622894287,
"step": 10,
"valid_targets_mean": 9758.7,
"valid_targets_min": 2911
},
{
"epoch": 0.08946322067594434,
"grad_norm": 4.520961825372613,
"learning_rate": 4.745762711864408e-06,
"loss": 0.8155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23452389240264893,
"step": 15,
"valid_targets_mean": 8859.2,
"valid_targets_min": 1366
},
{
"epoch": 0.11928429423459244,
"grad_norm": 2.114054619234936,
"learning_rate": 6.440677966101695e-06,
"loss": 0.7578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24326761066913605,
"step": 20,
"valid_targets_mean": 8662.6,
"valid_targets_min": 2822
},
{
"epoch": 0.14910536779324055,
"grad_norm": 1.4393876248426487,
"learning_rate": 8.135593220338983e-06,
"loss": 0.7069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.260776162147522,
"step": 25,
"valid_targets_mean": 10508.2,
"valid_targets_min": 3664
},
{
"epoch": 0.17892644135188868,
"grad_norm": 1.1642740315365356,
"learning_rate": 9.830508474576272e-06,
"loss": 0.6928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2416732907295227,
"step": 30,
"valid_targets_mean": 9538.0,
"valid_targets_min": 2536
},
{
"epoch": 0.20874751491053678,
"grad_norm": 0.7766319665933883,
"learning_rate": 1.1525423728813561e-05,
"loss": 0.6532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21398067474365234,
"step": 35,
"valid_targets_mean": 9878.9,
"valid_targets_min": 3211
},
{
"epoch": 0.23856858846918488,
"grad_norm": 0.5838308603893411,
"learning_rate": 1.3220338983050848e-05,
"loss": 0.6283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20412425696849823,
"step": 40,
"valid_targets_mean": 9810.7,
"valid_targets_min": 2899
},
{
"epoch": 0.268389662027833,
"grad_norm": 0.5037663875608299,
"learning_rate": 1.4915254237288137e-05,
"loss": 0.605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1884448379278183,
"step": 45,
"valid_targets_mean": 9185.9,
"valid_targets_min": 2971
},
{
"epoch": 0.2982107355864811,
"grad_norm": 0.39709002181472497,
"learning_rate": 1.6610169491525424e-05,
"loss": 0.5845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17413000762462616,
"step": 50,
"valid_targets_mean": 8571.4,
"valid_targets_min": 2940
},
{
"epoch": 0.32803180914512925,
"grad_norm": 0.33127605925110176,
"learning_rate": 1.8305084745762713e-05,
"loss": 0.5575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16696935892105103,
"step": 55,
"valid_targets_mean": 9093.4,
"valid_targets_min": 2520
},
{
"epoch": 0.35785288270377735,
"grad_norm": 0.3215099174730303,
"learning_rate": 2e-05,
"loss": 0.5544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17364796996116638,
"step": 60,
"valid_targets_mean": 8888.1,
"valid_targets_min": 2491
},
{
"epoch": 0.38767395626242546,
"grad_norm": 0.2821181675694596,
"learning_rate": 2.169491525423729e-05,
"loss": 0.5342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18019835650920868,
"step": 65,
"valid_targets_mean": 9505.2,
"valid_targets_min": 3936
},
{
"epoch": 0.41749502982107356,
"grad_norm": 0.2414187942326976,
"learning_rate": 2.338983050847458e-05,
"loss": 0.5251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18193358182907104,
"step": 70,
"valid_targets_mean": 9565.2,
"valid_targets_min": 3319
},
{
"epoch": 0.44731610337972166,
"grad_norm": 0.22420801544120914,
"learning_rate": 2.5084745762711865e-05,
"loss": 0.512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14556047320365906,
"step": 75,
"valid_targets_mean": 9219.1,
"valid_targets_min": 3220
},
{
"epoch": 0.47713717693836977,
"grad_norm": 0.2412726568511184,
"learning_rate": 2.6779661016949153e-05,
"loss": 0.5028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16368263959884644,
"step": 80,
"valid_targets_mean": 8981.2,
"valid_targets_min": 3194
},
{
"epoch": 0.5069582504970179,
"grad_norm": 0.23318641532107723,
"learning_rate": 2.8474576271186442e-05,
"loss": 0.5007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16197912395000458,
"step": 85,
"valid_targets_mean": 8739.4,
"valid_targets_min": 2389
},
{
"epoch": 0.536779324055666,
"grad_norm": 0.23052715262359902,
"learning_rate": 3.016949152542373e-05,
"loss": 0.4809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1737665832042694,
"step": 90,
"valid_targets_mean": 10664.4,
"valid_targets_min": 1536
},
{
"epoch": 0.5666003976143141,
"grad_norm": 0.22528756572218692,
"learning_rate": 3.186440677966102e-05,
"loss": 0.4788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1719246357679367,
"step": 95,
"valid_targets_mean": 10288.0,
"valid_targets_min": 2629
},
{
"epoch": 0.5964214711729622,
"grad_norm": 0.2601607078156621,
"learning_rate": 3.355932203389831e-05,
"loss": 0.4687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1524590253829956,
"step": 100,
"valid_targets_mean": 9881.0,
"valid_targets_min": 2893
},
{
"epoch": 0.6262425447316103,
"grad_norm": 0.22675199525629694,
"learning_rate": 3.52542372881356e-05,
"loss": 0.4565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15666724741458893,
"step": 105,
"valid_targets_mean": 10118.0,
"valid_targets_min": 3280
},
{
"epoch": 0.6560636182902585,
"grad_norm": 0.23545944466896276,
"learning_rate": 3.6949152542372886e-05,
"loss": 0.4635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15132704377174377,
"step": 110,
"valid_targets_mean": 9088.4,
"valid_targets_min": 1751
},
{
"epoch": 0.6858846918489065,
"grad_norm": 0.2552945140336154,
"learning_rate": 3.8644067796610175e-05,
"loss": 0.4624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15736186504364014,
"step": 115,
"valid_targets_mean": 9703.8,
"valid_targets_min": 2496
},
{
"epoch": 0.7157057654075547,
"grad_norm": 0.2661696526978554,
"learning_rate": 3.999991182852808e-05,
"loss": 0.4518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14347058534622192,
"step": 120,
"valid_targets_mean": 8487.7,
"valid_targets_min": 2297
},
{
"epoch": 0.7455268389662028,
"grad_norm": 0.24265446044028421,
"learning_rate": 3.999682590863935e-05,
"loss": 0.4401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15881270170211792,
"step": 125,
"valid_targets_mean": 10650.6,
"valid_targets_min": 4557
},
{
"epoch": 0.7753479125248509,
"grad_norm": 0.3477779737132978,
"learning_rate": 3.9989332192544725e-05,
"loss": 0.4454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1669636368751526,
"step": 130,
"valid_targets_mean": 10482.2,
"valid_targets_min": 3995
},
{
"epoch": 0.805168986083499,
"grad_norm": 0.2688791723605399,
"learning_rate": 3.997743233204502e-05,
"loss": 0.4463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1437249481678009,
"step": 135,
"valid_targets_mean": 9449.9,
"valid_targets_min": 2652
},
{
"epoch": 0.8349900596421471,
"grad_norm": 0.27803453281851986,
"learning_rate": 3.996112895016452e-05,
"loss": 0.4377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13932648301124573,
"step": 140,
"valid_targets_mean": 9696.8,
"valid_targets_min": 3542
},
{
"epoch": 0.8648111332007953,
"grad_norm": 0.2616493730811242,
"learning_rate": 3.994042564057279e-05,
"loss": 0.4405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14841462671756744,
"step": 145,
"valid_targets_mean": 9823.4,
"valid_targets_min": 3035
},
{
"epoch": 0.8946322067594433,
"grad_norm": 0.2763712349418631,
"learning_rate": 3.9915326966792555e-05,
"loss": 0.4393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14706194400787354,
"step": 150,
"valid_targets_mean": 9098.7,
"valid_targets_min": 2668
},
{
"epoch": 0.9244532803180915,
"grad_norm": 0.29517511116566714,
"learning_rate": 3.9885838461193794e-05,
"loss": 0.4316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1425623744726181,
"step": 155,
"valid_targets_mean": 9000.5,
"valid_targets_min": 3371
},
{
"epoch": 0.9542743538767395,
"grad_norm": 0.304747766618288,
"learning_rate": 3.985196662377424e-05,
"loss": 0.4372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1514747142791748,
"step": 160,
"valid_targets_mean": 9558.1,
"valid_targets_min": 3439
},
{
"epoch": 0.9840954274353877,
"grad_norm": 0.25971095278000167,
"learning_rate": 3.981371892072661e-05,
"loss": 0.4321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14394722878932953,
"step": 165,
"valid_targets_mean": 9277.1,
"valid_targets_min": 3851
},
{
"epoch": 1.0119284294234592,
"grad_norm": 0.2900070928223118,
"learning_rate": 3.9771103782792956e-05,
"loss": 0.428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1467021405696869,
"step": 170,
"valid_targets_mean": 9804.5,
"valid_targets_min": 3955
},
{
"epoch": 1.0417495029821073,
"grad_norm": 0.2562110209083835,
"learning_rate": 3.9724130603406204e-05,
"loss": 0.4214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15313297510147095,
"step": 175,
"valid_targets_mean": 10062.1,
"valid_targets_min": 2682
},
{
"epoch": 1.0715705765407555,
"grad_norm": 0.26570170838169876,
"learning_rate": 3.9672809736619684e-05,
"loss": 0.426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1393103003501892,
"step": 180,
"valid_targets_mean": 9271.5,
"valid_targets_min": 1624
},
{
"epoch": 1.1013916500994037,
"grad_norm": 0.2757881608821532,
"learning_rate": 3.961715249482482e-05,
"loss": 0.4128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14483334124088287,
"step": 185,
"valid_targets_mean": 9806.2,
"valid_targets_min": 2567
},
{
"epoch": 1.1312127236580518,
"grad_norm": 0.2593086099940217,
"learning_rate": 3.95571711462576e-05,
"loss": 0.4201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15444408357143402,
"step": 190,
"valid_targets_mean": 10145.6,
"valid_targets_min": 3190
},
{
"epoch": 1.1610337972166997,
"grad_norm": 0.3074440806567765,
"learning_rate": 3.9492878912294345e-05,
"loss": 0.4246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13667774200439453,
"step": 195,
"valid_targets_mean": 8876.5,
"valid_targets_min": 2465
},
{
"epoch": 1.190854870775348,
"grad_norm": 0.26202363702381226,
"learning_rate": 3.942428996453741e-05,
"loss": 0.4141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1346900910139084,
"step": 200,
"valid_targets_mean": 9183.1,
"valid_targets_min": 3115
},
{
"epoch": 1.220675944333996,
"grad_norm": 0.23482546143374838,
"learning_rate": 3.935141942169138e-05,
"loss": 0.4196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14526072144508362,
"step": 205,
"valid_targets_mean": 10055.5,
"valid_targets_min": 1170
},
{
"epoch": 1.250497017892644,
"grad_norm": 0.24783213003782373,
"learning_rate": 3.927428334623054e-05,
"loss": 0.4251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13255912065505981,
"step": 210,
"valid_targets_mean": 10057.3,
"valid_targets_min": 4200
},
{
"epoch": 1.2803180914512922,
"grad_norm": 0.3356314182794496,
"learning_rate": 3.919289874085837e-05,
"loss": 0.4219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14556168019771576,
"step": 215,
"valid_targets_mean": 9935.3,
"valid_targets_min": 4601
},
{
"epoch": 1.3101391650099403,
"grad_norm": 0.3205331125262116,
"learning_rate": 3.910728354475961e-05,
"loss": 0.4156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14186373353004456,
"step": 220,
"valid_targets_mean": 10047.3,
"valid_targets_min": 3586
},
{
"epoch": 1.3399602385685885,
"grad_norm": 0.24942595965959885,
"learning_rate": 3.9017456629646126e-05,
"loss": 0.4047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1405375897884369,
"step": 225,
"valid_targets_mean": 10060.8,
"valid_targets_min": 3242
},
{
"epoch": 1.3697813121272366,
"grad_norm": 0.2584612306483155,
"learning_rate": 3.8923437795597056e-05,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11859870702028275,
"step": 230,
"valid_targets_mean": 8309.6,
"valid_targets_min": 3444
},
{
"epoch": 1.3996023856858848,
"grad_norm": 0.25784491517610686,
"learning_rate": 3.882524776669442e-05,
"loss": 0.415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13979320228099823,
"step": 235,
"valid_targets_mean": 9624.1,
"valid_targets_min": 2944
},
{
"epoch": 1.4294234592445327,
"grad_norm": 0.261391607331903,
"learning_rate": 3.872290818645497e-05,
"loss": 0.4121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1375332772731781,
"step": 240,
"valid_targets_mean": 9274.7,
"valid_targets_min": 2578
},
{
"epoch": 1.459244532803181,
"grad_norm": 0.26508343196177364,
"learning_rate": 3.861644161305948e-05,
"loss": 0.4094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1376476287841797,
"step": 245,
"valid_targets_mean": 9600.3,
"valid_targets_min": 3024
},
{
"epoch": 1.489065606361829,
"grad_norm": 0.25905924292751825,
"learning_rate": 3.850587151438031e-05,
"loss": 0.4075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12836024165153503,
"step": 250,
"valid_targets_mean": 8532.4,
"valid_targets_min": 2314
},
{
"epoch": 1.518886679920477,
"grad_norm": 0.24769087290084157,
"learning_rate": 3.839122226280854e-05,
"loss": 0.4106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1362186223268509,
"step": 255,
"valid_targets_mean": 9983.8,
"valid_targets_min": 3384
},
{
"epoch": 1.5487077534791251,
"grad_norm": 0.24871240968489136,
"learning_rate": 3.8272519129881696e-05,
"loss": 0.4046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1329842209815979,
"step": 260,
"valid_targets_mean": 9754.4,
"valid_targets_min": 3902
},
{
"epoch": 1.5785288270377733,
"grad_norm": 0.2574497272355019,
"learning_rate": 3.814978828071325e-05,
"loss": 0.4081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14275474846363068,
"step": 265,
"valid_targets_mean": 9707.1,
"valid_targets_min": 2847
},
{
"epoch": 1.6083499005964215,
"grad_norm": 0.26745189858092433,
"learning_rate": 3.802305676822517e-05,
"loss": 0.4032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13096818327903748,
"step": 270,
"valid_targets_mean": 9074.9,
"valid_targets_min": 2634
},
{
"epoch": 1.6381709741550696,
"grad_norm": 0.2801974592140141,
"learning_rate": 3.789235252718484e-05,
"loss": 0.4099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14643187820911407,
"step": 275,
"valid_targets_mean": 9043.5,
"valid_targets_min": 3736
},
{
"epoch": 1.6679920477137178,
"grad_norm": 0.24615159428763864,
"learning_rate": 3.775770436804751e-05,
"loss": 0.4088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1417016237974167,
"step": 280,
"valid_targets_mean": 10256.2,
"valid_targets_min": 1765
},
{
"epoch": 1.697813121272366,
"grad_norm": 0.255233286604699,
"learning_rate": 3.761914197060573e-05,
"loss": 0.4005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12331944704055786,
"step": 285,
"valid_targets_mean": 9173.6,
"valid_targets_min": 3254
},
{
"epoch": 1.7276341948310139,
"grad_norm": 0.2779216978110516,
"learning_rate": 3.747669587744723e-05,
"loss": 0.4007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13669756054878235,
"step": 290,
"valid_targets_mean": 9882.7,
"valid_targets_min": 2297
},
{
"epoch": 1.757455268389662,
"grad_norm": 0.26422478087504886,
"learning_rate": 3.733039748722258e-05,
"loss": 0.4053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14321698248386383,
"step": 295,
"valid_targets_mean": 9899.4,
"valid_targets_min": 2409
},
{
"epoch": 1.78727634194831,
"grad_norm": 0.3959250627738573,
"learning_rate": 3.718027904772412e-05,
"loss": 0.4021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15091611444950104,
"step": 300,
"valid_targets_mean": 9824.9,
"valid_targets_min": 1650
},
{
"epoch": 1.8170974155069581,
"grad_norm": 0.28379938678387157,
"learning_rate": 3.702637364877776e-05,
"loss": 0.4095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1386471688747406,
"step": 305,
"valid_targets_mean": 9847.7,
"valid_targets_min": 3711
},
{
"epoch": 1.8469184890656063,
"grad_norm": 0.2889207311661245,
"learning_rate": 3.686871521494915e-05,
"loss": 0.4033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14630842208862305,
"step": 310,
"valid_targets_mean": 9958.7,
"valid_targets_min": 2990
},
{
"epoch": 1.8767395626242545,
"grad_norm": 0.2561333348175739,
"learning_rate": 3.67073384980659e-05,
"loss": 0.395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12660804390907288,
"step": 315,
"valid_targets_mean": 9167.2,
"valid_targets_min": 1410
},
{
"epoch": 1.9065606361829026,
"grad_norm": 0.2914965872986488,
"learning_rate": 3.654227906955737e-05,
"loss": 0.3981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14069530367851257,
"step": 320,
"valid_targets_mean": 9996.4,
"valid_targets_min": 2710
},
{
"epoch": 1.9363817097415508,
"grad_norm": 0.2543075725463023,
"learning_rate": 3.6373573312613874e-05,
"loss": 0.3984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1340501606464386,
"step": 325,
"valid_targets_mean": 9937.8,
"valid_targets_min": 3155
},
{
"epoch": 1.966202783300199,
"grad_norm": 0.2646619167637183,
"learning_rate": 3.620125841416692e-05,
"loss": 0.3987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13946759700775146,
"step": 330,
"valid_targets_mean": 9728.8,
"valid_targets_min": 2807
},
{
"epoch": 1.9960238568588469,
"grad_norm": 0.294581414903317,
"learning_rate": 3.602537235669228e-05,
"loss": 0.3966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12473136186599731,
"step": 335,
"valid_targets_mean": 9149.7,
"valid_targets_min": 3235
},
{
"epoch": 2.0238568588469183,
"grad_norm": 0.27850055295898757,
"learning_rate": 3.5845953909837716e-05,
"loss": 0.393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12054517865180969,
"step": 340,
"valid_targets_mean": 8854.7,
"valid_targets_min": 1949
},
{
"epoch": 2.0536779324055665,
"grad_norm": 0.26244144180768225,
"learning_rate": 3.566304262187718e-05,
"loss": 0.3938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12201672792434692,
"step": 345,
"valid_targets_mean": 8900.0,
"valid_targets_min": 2330
},
{
"epoch": 2.0834990059642147,
"grad_norm": 0.2675354283095,
"learning_rate": 3.547667881099341e-05,
"loss": 0.3931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1361660212278366,
"step": 350,
"valid_targets_mean": 10345.9,
"valid_targets_min": 5477
},
{
"epoch": 2.113320079522863,
"grad_norm": 0.27333374666170746,
"learning_rate": 3.528690355639079e-05,
"loss": 0.4028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1383921355009079,
"step": 355,
"valid_targets_mean": 10431.5,
"valid_targets_min": 2495
},
{
"epoch": 2.143141153081511,
"grad_norm": 0.2515801080189176,
"learning_rate": 3.509375868924048e-05,
"loss": 0.3857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11818955838680267,
"step": 360,
"valid_targets_mean": 8793.7,
"valid_targets_min": 3129
},
{
"epoch": 2.172962226640159,
"grad_norm": 0.2625179819877921,
"learning_rate": 3.489728678345978e-05,
"loss": 0.3938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13788491487503052,
"step": 365,
"valid_targets_mean": 10044.4,
"valid_targets_min": 4449
},
{
"epoch": 2.2027833001988073,
"grad_norm": 0.2964649252713314,
"learning_rate": 3.46975311463278e-05,
"loss": 0.3843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13454961776733398,
"step": 370,
"valid_targets_mean": 9236.4,
"valid_targets_min": 2752
},
{
"epoch": 2.2326043737574555,
"grad_norm": 0.2710375300470546,
"learning_rate": 3.449453580893945e-05,
"loss": 0.3968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12816333770751953,
"step": 375,
"valid_targets_mean": 9797.3,
"valid_targets_min": 3511
},
{
"epoch": 2.2624254473161036,
"grad_norm": 0.25606918527553785,
"learning_rate": 3.428834551649989e-05,
"loss": 0.387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13535423576831818,
"step": 380,
"valid_targets_mean": 9259.8,
"valid_targets_min": 1987
},
{
"epoch": 2.2922465208747513,
"grad_norm": 0.25758666339894315,
"learning_rate": 3.4079005718461596e-05,
"loss": 0.3935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1293325573205948,
"step": 385,
"valid_targets_mean": 8947.5,
"valid_targets_min": 2799
},
{
"epoch": 2.3220675944333995,
"grad_norm": 0.24234110541082965,
"learning_rate": 3.386656255850617e-05,
"loss": 0.3904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1355832815170288,
"step": 390,
"valid_targets_mean": 10094.5,
"valid_targets_min": 1932
},
{
"epoch": 2.3518886679920477,
"grad_norm": 0.25695722708253604,
"learning_rate": 3.365106286437309e-05,
"loss": 0.3874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12625740468502045,
"step": 395,
"valid_targets_mean": 9291.4,
"valid_targets_min": 3611
},
{
"epoch": 2.381709741550696,
"grad_norm": 0.2563116812129519,
"learning_rate": 3.3432554137537764e-05,
"loss": 0.3895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11663530766963959,
"step": 400,
"valid_targets_mean": 9260.6,
"valid_targets_min": 2691
},
{
"epoch": 2.411530815109344,
"grad_norm": 0.2572513815596599,
"learning_rate": 3.321108454274103e-05,
"loss": 0.3907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12803307175636292,
"step": 405,
"valid_targets_mean": 9468.8,
"valid_targets_min": 3154
},
{
"epoch": 2.441351888667992,
"grad_norm": 0.2607707850998344,
"learning_rate": 3.29867028973724e-05,
"loss": 0.3859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1312410533428192,
"step": 410,
"valid_targets_mean": 8711.6,
"valid_targets_min": 3918
},
{
"epoch": 2.4711729622266403,
"grad_norm": 0.23731121331277513,
"learning_rate": 3.275945866070955e-05,
"loss": 0.3864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14013691246509552,
"step": 415,
"valid_targets_mean": 10685.4,
"valid_targets_min": 2421
},
{
"epoch": 2.500994035785288,
"grad_norm": 0.24444398626626032,
"learning_rate": 3.252940192301624e-05,
"loss": 0.3864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12714995443820953,
"step": 420,
"valid_targets_mean": 10293.2,
"valid_targets_min": 3706
},
{
"epoch": 2.530815109343936,
"grad_norm": 0.2405776417574965,
"learning_rate": 3.229658339450119e-05,
"loss": 0.3903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13615553081035614,
"step": 425,
"valid_targets_mean": 10000.6,
"valid_targets_min": 3407
},
{
"epoch": 2.5606361829025843,
"grad_norm": 0.24845003130000037,
"learning_rate": 3.2061054394140285e-05,
"loss": 0.3918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1352383941411972,
"step": 430,
"valid_targets_mean": 10112.7,
"valid_targets_min": 3079
},
{
"epoch": 2.5904572564612325,
"grad_norm": 0.2562994770469392,
"learning_rate": 3.182286683836461e-05,
"loss": 0.3854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12299023568630219,
"step": 435,
"valid_targets_mean": 8783.9,
"valid_targets_min": 1946
},
{
"epoch": 2.6202783300198806,
"grad_norm": 0.2591727396961362,
"learning_rate": 3.158207322961678e-05,
"loss": 0.389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12027916312217712,
"step": 440,
"valid_targets_mean": 8550.8,
"valid_targets_min": 1304
},
{
"epoch": 2.650099403578529,
"grad_norm": 0.23343608939765326,
"learning_rate": 3.1338726644778084e-05,
"loss": 0.3798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12307856976985931,
"step": 445,
"valid_targets_mean": 9197.8,
"valid_targets_min": 3880
},
{
"epoch": 2.679920477137177,
"grad_norm": 0.2608548923226282,
"learning_rate": 3.109288072346904e-05,
"loss": 0.3846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13536040484905243,
"step": 450,
"valid_targets_mean": 9562.6,
"valid_targets_min": 1005
},
{
"epoch": 2.709741550695825,
"grad_norm": 0.2674341383904537,
"learning_rate": 3.084458965622591e-05,
"loss": 0.3854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1220935583114624,
"step": 455,
"valid_targets_mean": 9237.2,
"valid_targets_min": 3261
},
{
"epoch": 2.7395626242544733,
"grad_norm": 0.24113612336638285,
"learning_rate": 3.0593908172555696e-05,
"loss": 0.3829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12890732288360596,
"step": 460,
"valid_targets_mean": 9338.5,
"valid_targets_min": 1751
},
{
"epoch": 2.7693836978131214,
"grad_norm": 0.23504577264959453,
"learning_rate": 3.0340891528872503e-05,
"loss": 0.3861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1365663856267929,
"step": 465,
"valid_targets_mean": 10284.9,
"valid_targets_min": 3324
},
{
"epoch": 2.7992047713717696,
"grad_norm": 0.23542940739467466,
"learning_rate": 3.0085595496317558e-05,
"loss": 0.386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12416007369756699,
"step": 470,
"valid_targets_mean": 9443.1,
"valid_targets_min": 2297
},
{
"epoch": 2.8290258449304178,
"grad_norm": 0.2520271616790775,
"learning_rate": 2.9828076348465913e-05,
"loss": 0.3887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12400713562965393,
"step": 475,
"valid_targets_mean": 8372.7,
"valid_targets_min": 3158
},
{
"epoch": 2.8588469184890655,
"grad_norm": 0.2617214720802655,
"learning_rate": 2.956839084892235e-05,
"loss": 0.3823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13921988010406494,
"step": 480,
"valid_targets_mean": 10316.4,
"valid_targets_min": 2117
},
{
"epoch": 2.8886679920477136,
"grad_norm": 0.23989440057199513,
"learning_rate": 2.9306596238809292e-05,
"loss": 0.3826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12483088672161102,
"step": 485,
"valid_targets_mean": 9510.9,
"valid_targets_min": 1113
},
{
"epoch": 2.918489065606362,
"grad_norm": 0.2619100232438116,
"learning_rate": 2.9042750224149396e-05,
"loss": 0.384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1344929337501526,
"step": 490,
"valid_targets_mean": 10448.7,
"valid_targets_min": 4344
},
{
"epoch": 2.94831013916501,
"grad_norm": 0.24604640536880984,
"learning_rate": 2.877691096314576e-05,
"loss": 0.3878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13883237540721893,
"step": 495,
"valid_targets_mean": 10006.5,
"valid_targets_min": 3781
},
{
"epoch": 2.978131212723658,
"grad_norm": 0.24449981627934345,
"learning_rate": 2.850913705336238e-05,
"loss": 0.3883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12503878772258759,
"step": 500,
"valid_targets_mean": 9295.5,
"valid_targets_min": 2761
},
{
"epoch": 3.00596421471173,
"grad_norm": 0.25973568404266545,
"learning_rate": 2.8239487518807816e-05,
"loss": 0.3787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11682064831256866,
"step": 505,
"valid_targets_mean": 8969.0,
"valid_targets_min": 2105
},
{
"epoch": 3.0357852882703775,
"grad_norm": 0.22127882526750678,
"learning_rate": 2.7968021796924834e-05,
"loss": 0.376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10894481837749481,
"step": 510,
"valid_targets_mean": 8102.7,
"valid_targets_min": 4194
},
{
"epoch": 3.0656063618290257,
"grad_norm": 0.2568154982874112,
"learning_rate": 2.76947997254889e-05,
"loss": 0.3794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1269792914390564,
"step": 515,
"valid_targets_mean": 9722.5,
"valid_targets_min": 3461
},
{
"epoch": 3.095427435387674,
"grad_norm": 0.2418191676681759,
"learning_rate": 2.741988152941849e-05,
"loss": 0.3789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313607394695282,
"step": 520,
"valid_targets_mean": 10292.7,
"valid_targets_min": 2701
},
{
"epoch": 3.125248508946322,
"grad_norm": 0.26137529392846076,
"learning_rate": 2.714332780749997e-05,
"loss": 0.3727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12172074615955353,
"step": 525,
"valid_targets_mean": 9764.4,
"valid_targets_min": 2498
},
{
"epoch": 3.15506958250497,
"grad_norm": 0.24623727496182754,
"learning_rate": 2.6865199519030178e-05,
"loss": 0.3779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12355610728263855,
"step": 530,
"valid_targets_mean": 9638.3,
"valid_targets_min": 3022
},
{
"epoch": 3.1848906560636183,
"grad_norm": 0.24954885423234177,
"learning_rate": 2.658555797037945e-05,
"loss": 0.3787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12461437284946442,
"step": 535,
"valid_targets_mean": 8588.2,
"valid_targets_min": 1886
},
{
"epoch": 3.2147117296222665,
"grad_norm": 0.21483912308106684,
"learning_rate": 2.6304464801478177e-05,
"loss": 0.3762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1340673416852951,
"step": 540,
"valid_targets_mean": 10699.2,
"valid_targets_min": 2852
},
{
"epoch": 3.2445328031809146,
"grad_norm": 0.253090712355003,
"learning_rate": 2.6021981972229852e-05,
"loss": 0.3796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1394907385110855,
"step": 545,
"valid_targets_mean": 9860.4,
"valid_targets_min": 2323
},
{
"epoch": 3.274353876739563,
"grad_norm": 0.22081826679193847,
"learning_rate": 2.5738171748853552e-05,
"loss": 0.3805,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1250452846288681,
"step": 550,
"valid_targets_mean": 10036.3,
"valid_targets_min": 1623
},
{
"epoch": 3.3041749502982105,
"grad_norm": 0.25504537067123745,
"learning_rate": 2.545309669015895e-05,
"loss": 0.3771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12748511135578156,
"step": 555,
"valid_targets_mean": 10318.6,
"valid_targets_min": 4635
},
{
"epoch": 3.3339960238568587,
"grad_norm": 0.23504480529865782,
"learning_rate": 2.5166819633756746e-05,
"loss": 0.3781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12984226644039154,
"step": 560,
"valid_targets_mean": 9720.7,
"valid_targets_min": 2968
},
{
"epoch": 3.363817097415507,
"grad_norm": 0.22141055145170896,
"learning_rate": 2.4879403682207775e-05,
"loss": 0.3784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1256609559059143,
"step": 565,
"valid_targets_mean": 9638.0,
"valid_targets_min": 2492
},
{
"epoch": 3.393638170974155,
"grad_norm": 0.24107550315856965,
"learning_rate": 2.4590912189113575e-05,
"loss": 0.3759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13189947605133057,
"step": 570,
"valid_targets_mean": 9626.9,
"valid_targets_min": 2284
},
{
"epoch": 3.423459244532803,
"grad_norm": 0.23868274137862344,
"learning_rate": 2.430140874515171e-05,
"loss": 0.3742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12700285017490387,
"step": 575,
"valid_targets_mean": 9807.8,
"valid_targets_min": 3978
},
{
"epoch": 3.4532803180914513,
"grad_norm": 0.22377331028220349,
"learning_rate": 2.4010957164058803e-05,
"loss": 0.3751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1289224922657013,
"step": 580,
"valid_targets_mean": 9460.9,
"valid_targets_min": 2662
},
{
"epoch": 3.4831013916500995,
"grad_norm": 0.24019351233889105,
"learning_rate": 2.3719621468564416e-05,
"loss": 0.3763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12393779307603836,
"step": 585,
"valid_targets_mean": 9742.5,
"valid_targets_min": 2693
},
{
"epoch": 3.5129224652087476,
"grad_norm": 0.24701426386806288,
"learning_rate": 2.3427465876278843e-05,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12272713333368301,
"step": 590,
"valid_targets_mean": 10039.6,
"valid_targets_min": 2944
},
{
"epoch": 3.542743538767396,
"grad_norm": 0.25447639049380394,
"learning_rate": 2.3134554785537943e-05,
"loss": 0.3778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1295909881591797,
"step": 595,
"valid_targets_mean": 9521.8,
"valid_targets_min": 2744
},
{
"epoch": 3.572564612326044,
"grad_norm": 0.21876380474118456,
"learning_rate": 2.284095276120818e-05,
"loss": 0.376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11200149357318878,
"step": 600,
"valid_targets_mean": 9297.1,
"valid_targets_min": 4191
},
{
"epoch": 3.602385685884692,
"grad_norm": 0.2876203923550237,
"learning_rate": 2.2546724520454916e-05,
"loss": 0.3784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13492657244205475,
"step": 605,
"valid_targets_mean": 9474.2,
"valid_targets_min": 2691
},
{
"epoch": 3.63220675944334,
"grad_norm": 0.3109728940450909,
"learning_rate": 2.2251934918477126e-05,
"loss": 0.3764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1116630882024765,
"step": 610,
"valid_targets_mean": 8435.2,
"valid_targets_min": 2827
},
{
"epoch": 3.662027833001988,
"grad_norm": 0.23811609842596262,
"learning_rate": 2.1956648934211717e-05,
"loss": 0.3772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11188781261444092,
"step": 615,
"valid_targets_mean": 9120.6,
"valid_targets_min": 2188
},
{
"epoch": 3.691848906560636,
"grad_norm": 0.22411995085803113,
"learning_rate": 2.1660931656010568e-05,
"loss": 0.3708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11489223688840866,
"step": 620,
"valid_targets_mean": 8820.9,
"valid_targets_min": 3292
},
{
"epoch": 3.7216699801192843,
"grad_norm": 0.22528057105116367,
"learning_rate": 2.1364848267293424e-05,
"loss": 0.3748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11487048119306564,
"step": 625,
"valid_targets_mean": 8887.9,
"valid_targets_min": 2649
},
{
"epoch": 3.7514910536779325,
"grad_norm": 0.23210531928235775,
"learning_rate": 2.106846403217987e-05,
"loss": 0.3711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12289533764123917,
"step": 630,
"valid_targets_mean": 9040.5,
"valid_targets_min": 2927
},
{
"epoch": 3.7813121272365806,
"grad_norm": 0.22420265923284372,
"learning_rate": 2.0771844281103503e-05,
"loss": 0.3832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1272115558385849,
"step": 635,
"valid_targets_mean": 9784.5,
"valid_targets_min": 3904
},
{
"epoch": 3.8111332007952288,
"grad_norm": 0.2463614011353854,
"learning_rate": 2.0475054396411464e-05,
"loss": 0.3737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13237622380256653,
"step": 640,
"valid_targets_mean": 10127.7,
"valid_targets_min": 2225
},
{
"epoch": 3.8409542743538765,
"grad_norm": 0.23705284154886855,
"learning_rate": 2.017815979795257e-05,
"loss": 0.3771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1224151998758316,
"step": 645,
"valid_targets_mean": 8962.2,
"valid_targets_min": 2634
},
{
"epoch": 3.8707753479125246,
"grad_norm": 0.27393497557546315,
"learning_rate": 1.9881225928657132e-05,
"loss": 0.3755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11714585870504379,
"step": 650,
"valid_targets_mean": 9084.2,
"valid_targets_min": 3314
},
{
"epoch": 3.900596421471173,
"grad_norm": 0.24064359347617442,
"learning_rate": 1.958431824011176e-05,
"loss": 0.3773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11896838247776031,
"step": 655,
"valid_targets_mean": 9061.0,
"valid_targets_min": 3267
},
{
"epoch": 3.930417495029821,
"grad_norm": 0.23848724567684518,
"learning_rate": 1.928750217813214e-05,
"loss": 0.372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12432240694761276,
"step": 660,
"valid_targets_mean": 9445.7,
"valid_targets_min": 3656
},
{
"epoch": 3.960238568588469,
"grad_norm": 0.23633084097635995,
"learning_rate": 1.899084316833722e-05,
"loss": 0.3726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12451080232858658,
"step": 665,
"valid_targets_mean": 9713.7,
"valid_targets_min": 3239
},
{
"epoch": 3.9900596421471173,
"grad_norm": 0.24802678596076022,
"learning_rate": 1.869440660172774e-05,
"loss": 0.3711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1269044578075409,
"step": 670,
"valid_targets_mean": 9837.6,
"valid_targets_min": 2370
},
{
"epoch": 4.0178926441351885,
"grad_norm": 0.23067166950312643,
"learning_rate": 1.8398257820272438e-05,
"loss": 0.3711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13049767911434174,
"step": 675,
"valid_targets_mean": 9200.5,
"valid_targets_min": 3730
},
{
"epoch": 4.047713717693837,
"grad_norm": 0.22687025778414033,
"learning_rate": 1.8102462102505096e-05,
"loss": 0.3664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11435438692569733,
"step": 680,
"valid_targets_mean": 9556.8,
"valid_targets_min": 1170
},
{
"epoch": 4.077534791252485,
"grad_norm": 0.23492036510934328,
"learning_rate": 1.7807084649135473e-05,
"loss": 0.3686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11235421150922775,
"step": 685,
"valid_targets_mean": 8562.3,
"valid_targets_min": 2218
},
{
"epoch": 4.107355864811133,
"grad_norm": 0.22232507076273023,
"learning_rate": 1.751219056867751e-05,
"loss": 0.3649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12534525990486145,
"step": 690,
"valid_targets_mean": 9685.4,
"valid_targets_min": 3439
},
{
"epoch": 4.137176938369781,
"grad_norm": 0.2160815037269669,
"learning_rate": 1.7217844863097774e-05,
"loss": 0.3704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12021255493164062,
"step": 695,
"valid_targets_mean": 9724.7,
"valid_targets_min": 4045
},
{
"epoch": 4.166998011928429,
"grad_norm": 0.22504176113051863,
"learning_rate": 1.6924112413487382e-05,
"loss": 0.3711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12961643934249878,
"step": 700,
"valid_targets_mean": 10400.0,
"valid_targets_min": 3902
},
{
"epoch": 4.1968190854870775,
"grad_norm": 0.2239146324338242,
"learning_rate": 1.6631057965760674e-05,
"loss": 0.3784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1256789118051529,
"step": 705,
"valid_targets_mean": 10124.2,
"valid_targets_min": 2297
},
{
"epoch": 4.226640159045726,
"grad_norm": 0.24637631808051266,
"learning_rate": 1.633874611638353e-05,
"loss": 0.3695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11531466245651245,
"step": 710,
"valid_targets_mean": 9248.9,
"valid_targets_min": 3223
},
{
"epoch": 4.256461232604374,
"grad_norm": 0.2436999109413977,
"learning_rate": 1.6047241298134767e-05,
"loss": 0.3664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12499426305294037,
"step": 715,
"valid_targets_mean": 8669.4,
"valid_targets_min": 2360
},
{
"epoch": 4.286282306163022,
"grad_norm": 0.23915655247210107,
"learning_rate": 1.5756607765903525e-05,
"loss": 0.3673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13046535849571228,
"step": 720,
"valid_targets_mean": 9301.4,
"valid_targets_min": 3807
},
{
"epoch": 4.31610337972167,
"grad_norm": 0.27408527428068585,
"learning_rate": 1.5466909582525893e-05,
"loss": 0.3664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10745998471975327,
"step": 725,
"valid_targets_mean": 8807.9,
"valid_targets_min": 2492
},
{
"epoch": 4.345924453280318,
"grad_norm": 0.22513801096410269,
"learning_rate": 1.51782106046639e-05,
"loss": 0.3633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12577703595161438,
"step": 730,
"valid_targets_mean": 9537.7,
"valid_targets_min": 2496
},
{
"epoch": 4.3757455268389664,
"grad_norm": 0.22189885599414932,
"learning_rate": 1.4890574468729893e-05,
"loss": 0.3671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11032432317733765,
"step": 735,
"valid_targets_mean": 8715.6,
"valid_targets_min": 2174
},
{
"epoch": 4.405566600397615,
"grad_norm": 0.2262677009022974,
"learning_rate": 1.4604064576859513e-05,
"loss": 0.3695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12237927317619324,
"step": 740,
"valid_targets_mean": 8992.1,
"valid_targets_min": 4707
},
{
"epoch": 4.435387673956263,
"grad_norm": 0.21753810116362765,
"learning_rate": 1.43187440829363e-05,
"loss": 0.3681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13011434674263,
"step": 745,
"valid_targets_mean": 9628.3,
"valid_targets_min": 1150
},
{
"epoch": 4.465208747514911,
"grad_norm": 0.23166763660646592,
"learning_rate": 1.4034675878670964e-05,
"loss": 0.366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11755146086215973,
"step": 750,
"valid_targets_mean": 8975.1,
"valid_targets_min": 3581
},
{
"epoch": 4.495029821073558,
"grad_norm": 0.24948530398746774,
"learning_rate": 1.3751922579738566e-05,
"loss": 0.3671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1294461041688919,
"step": 755,
"valid_targets_mean": 9031.1,
"valid_targets_min": 2432
},
{
"epoch": 4.524850894632207,
"grad_norm": 0.22634991751036648,
"learning_rate": 1.3470546511976395e-05,
"loss": 0.3653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13205093145370483,
"step": 760,
"valid_targets_mean": 10131.7,
"valid_targets_min": 3011
},
{
"epoch": 4.5546719681908545,
"grad_norm": 0.2407875833801288,
"learning_rate": 1.3190609697645882e-05,
"loss": 0.3698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11704811453819275,
"step": 765,
"valid_targets_mean": 8992.2,
"valid_targets_min": 2746
},
{
"epoch": 4.584493041749503,
"grad_norm": 0.2232911058573211,
"learning_rate": 1.2912173841761288e-05,
"loss": 0.3648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12442412227392197,
"step": 770,
"valid_targets_mean": 10025.8,
"valid_targets_min": 4098
},
{
"epoch": 4.614314115308151,
"grad_norm": 0.21682935538971534,
"learning_rate": 1.2635300318488426e-05,
"loss": 0.3668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12467293441295624,
"step": 775,
"valid_targets_mean": 9998.5,
"valid_targets_min": 3071
},
{
"epoch": 4.644135188866799,
"grad_norm": 0.22390557119853882,
"learning_rate": 1.236005015761629e-05,
"loss": 0.3761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1269168108701706,
"step": 780,
"valid_targets_mean": 10062.8,
"valid_targets_min": 2395
},
{
"epoch": 4.673956262425447,
"grad_norm": 0.24000520155840155,
"learning_rate": 1.2086484031104515e-05,
"loss": 0.3719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11352889239788055,
"step": 785,
"valid_targets_mean": 8199.0,
"valid_targets_min": 3394
},
{
"epoch": 4.703777335984095,
"grad_norm": 0.21139692598361529,
"learning_rate": 1.1814662239709851e-05,
"loss": 0.3745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12804041802883148,
"step": 790,
"valid_targets_mean": 10180.8,
"valid_targets_min": 3169
},
{
"epoch": 4.7335984095427435,
"grad_norm": 0.23419559497822282,
"learning_rate": 1.1544644699694307e-05,
"loss": 0.3631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1193591058254242,
"step": 795,
"valid_targets_mean": 9056.3,
"valid_targets_min": 3674
},
{
"epoch": 4.763419483101392,
"grad_norm": 0.24646021525100137,
"learning_rate": 1.1276490929618177e-05,
"loss": 0.3653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11579230427742004,
"step": 800,
"valid_targets_mean": 9170.7,
"valid_targets_min": 2159
},
{
"epoch": 4.79324055666004,
"grad_norm": 0.22174275700505441,
"learning_rate": 1.1010260037220643e-05,
"loss": 0.3688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12448688596487045,
"step": 805,
"valid_targets_mean": 9580.0,
"valid_targets_min": 1459
},
{
"epoch": 4.823061630218688,
"grad_norm": 0.22272875692529412,
"learning_rate": 1.0746010706390981e-05,
"loss": 0.3717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12802085280418396,
"step": 810,
"valid_targets_mean": 9987.5,
"valid_targets_min": 2232
},
{
"epoch": 4.852882703777336,
"grad_norm": 0.2138007940278265,
"learning_rate": 1.048380118423316e-05,
"loss": 0.3742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12118125706911087,
"step": 815,
"valid_targets_mean": 9264.9,
"valid_targets_min": 2934
},
{
"epoch": 4.882703777335984,
"grad_norm": 0.22079166609200498,
"learning_rate": 1.0223689268226754e-05,
"loss": 0.3699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12725740671157837,
"step": 820,
"valid_targets_mean": 10555.1,
"valid_targets_min": 993
},
{
"epoch": 4.912524850894632,
"grad_norm": 0.21264177919293614,
"learning_rate": 9.965732293486929e-06,
"loss": 0.3639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13087576627731323,
"step": 825,
"valid_targets_mean": 10235.8,
"valid_targets_min": 2786
},
{
"epoch": 4.942345924453281,
"grad_norm": 0.24182464066253673,
"learning_rate": 9.709987120126371e-06,
"loss": 0.3657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12286906689405441,
"step": 830,
"valid_targets_mean": 8107.4,
"valid_targets_min": 2343
},
{
"epoch": 4.972166998011929,
"grad_norm": 0.2328727528842246,
"learning_rate": 9.456510120721911e-06,
"loss": 0.3713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11418526619672775,
"step": 835,
"valid_targets_mean": 8587.7,
"valid_targets_min": 2879
},
{
"epoch": 5.0,
"grad_norm": 0.28105201212007624,
"learning_rate": 9.205357167888595e-06,
"loss": 0.3688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17316186428070068,
"step": 840,
"valid_targets_mean": 9005.3,
"valid_targets_min": 3954
},
{
"epoch": 5.029821073558648,
"grad_norm": 0.30217719030448426,
"learning_rate": 8.956583621963996e-06,
"loss": 0.3667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11961351335048676,
"step": 845,
"valid_targets_mean": 9328.7,
"valid_targets_min": 2441
},
{
"epoch": 5.059642147117296,
"grad_norm": 0.2636234100864308,
"learning_rate": 8.710244318805406e-06,
"loss": 0.3641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11540426313877106,
"step": 850,
"valid_targets_mean": 9178.5,
"valid_targets_min": 2654
},
{
"epoch": 5.0894632206759445,
"grad_norm": 0.2243454259706472,
"learning_rate": 8.466393557702659e-06,
"loss": 0.3598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11149504780769348,
"step": 855,
"valid_targets_mean": 9037.4,
"valid_targets_min": 2353
},
{
"epoch": 5.119284294234593,
"grad_norm": 0.2149403201957784,
"learning_rate": 8.225085089409231e-06,
"loss": 0.3652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1426597237586975,
"step": 860,
"valid_targets_mean": 10396.4,
"valid_targets_min": 3269
},
{
"epoch": 5.149105367793241,
"grad_norm": 0.22198093615836403,
"learning_rate": 7.98637210429422e-06,
"loss": 0.3676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12441132217645645,
"step": 865,
"valid_targets_mean": 10199.0,
"valid_targets_min": 2953
},
{
"epoch": 5.178926441351889,
"grad_norm": 0.21985018694532593,
"learning_rate": 7.750307220617892e-06,
"loss": 0.3606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12127161026000977,
"step": 870,
"valid_targets_mean": 9964.5,
"valid_targets_min": 5004
},
{
"epoch": 5.208747514910537,
"grad_norm": 0.21440275235872072,
"learning_rate": 7.5169424729333e-06,
"loss": 0.3626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12604345381259918,
"step": 875,
"valid_targets_mean": 9950.9,
"valid_targets_min": 3104
},
{
"epoch": 5.238568588469185,
"grad_norm": 0.22878390162081763,
"learning_rate": 7.286329300616575e-06,
"loss": 0.3708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13689112663269043,
"step": 880,
"valid_targets_mean": 10335.2,
"valid_targets_min": 3606
},
{
"epoch": 5.2683896620278325,
"grad_norm": 0.2218645567270629,
"learning_rate": 7.058518536528427e-06,
"loss": 0.3711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12371527403593063,
"step": 885,
"valid_targets_mean": 9315.7,
"valid_targets_min": 3386
},
{
"epoch": 5.298210735586481,
"grad_norm": 0.20525056421525675,
"learning_rate": 6.833560395809307e-06,
"loss": 0.3666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12798547744750977,
"step": 890,
"valid_targets_mean": 9637.3,
"valid_targets_min": 2280
},
{
"epoch": 5.328031809145129,
"grad_norm": 0.22438108847516308,
"learning_rate": 6.611504464810754e-06,
"loss": 0.3633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12381379306316376,
"step": 895,
"valid_targets_mean": 9930.8,
"valid_targets_min": 3306
},
{
"epoch": 5.357852882703777,
"grad_norm": 0.2014227570589018,
"learning_rate": 6.392399690165328e-06,
"loss": 0.3586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10803110897541046,
"step": 900,
"valid_targets_mean": 8890.2,
"valid_targets_min": 2494
},
{
"epoch": 5.387673956262425,
"grad_norm": 0.20843428620879534,
"learning_rate": 6.176294367997564e-06,
"loss": 0.3643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1383419781923294,
"step": 905,
"valid_targets_mean": 10496.9,
"valid_targets_min": 2517
},
{
"epoch": 5.417495029821073,
"grad_norm": 0.2279795640183649,
"learning_rate": 5.9632361332783075e-06,
"loss": 0.3638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12038109451532364,
"step": 910,
"valid_targets_mean": 9726.5,
"valid_targets_min": 3070
},
{
"epoch": 5.4473161033797215,
"grad_norm": 0.21438762423589458,
"learning_rate": 5.753271949324779e-06,
"loss": 0.3678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11860641837120056,
"step": 915,
"valid_targets_mean": 8619.2,
"valid_targets_min": 3587
},
{
"epoch": 5.47713717693837,
"grad_norm": 0.2042751340473231,
"learning_rate": 5.546448097448709e-06,
"loss": 0.3617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12780383229255676,
"step": 920,
"valid_targets_mean": 10348.6,
"valid_targets_min": 3762
},
{
"epoch": 5.506958250497018,
"grad_norm": 0.20548277545029267,
"learning_rate": 5.342810166754773e-06,
"loss": 0.3635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10209870338439941,
"step": 925,
"valid_targets_mean": 8156.8,
"valid_targets_min": 2395
},
{
"epoch": 5.536779324055666,
"grad_norm": 0.21298707779747966,
"learning_rate": 5.142403044091635e-06,
"loss": 0.3635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12702594697475433,
"step": 930,
"valid_targets_mean": 10764.0,
"valid_targets_min": 4226
},
{
"epoch": 5.566600397614314,
"grad_norm": 0.21096182884472378,
"learning_rate": 4.945270904157766e-06,
"loss": 0.3693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1142885610461235,
"step": 935,
"valid_targets_mean": 8712.3,
"valid_targets_min": 3123
},
{
"epoch": 5.596421471172962,
"grad_norm": 0.2137029350386158,
"learning_rate": 4.751457199764249e-06,
"loss": 0.3619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12835736572742462,
"step": 940,
"valid_targets_mean": 9765.0,
"valid_targets_min": 2831
},
{
"epoch": 5.6262425447316105,
"grad_norm": 0.22355702289752202,
"learning_rate": 4.5610046522567e-06,
"loss": 0.3718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12691718339920044,
"step": 945,
"valid_targets_mean": 9729.5,
"valid_targets_min": 3585
},
{
"epoch": 5.656063618290259,
"grad_norm": 0.21492851974551722,
"learning_rate": 4.373955242098427e-06,
"loss": 0.3683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11885521560907364,
"step": 950,
"valid_targets_mean": 9381.0,
"valid_targets_min": 4095
},
{
"epoch": 5.685884691848907,
"grad_norm": 0.20233385939777984,
"learning_rate": 4.190350199616888e-06,
"loss": 0.3672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12096765637397766,
"step": 955,
"valid_targets_mean": 10414.7,
"valid_targets_min": 3631
},
{
"epoch": 5.715705765407555,
"grad_norm": 0.22049322690289747,
"learning_rate": 4.01022999591552e-06,
"loss": 0.3625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13061478734016418,
"step": 960,
"valid_targets_mean": 9200.2,
"valid_targets_min": 2105
},
{
"epoch": 5.745526838966203,
"grad_norm": 0.20901029363729942,
"learning_rate": 3.833634333952882e-06,
"loss": 0.3584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1363891214132309,
"step": 965,
"valid_targets_mean": 9776.2,
"valid_targets_min": 3927
},
{
"epoch": 5.775347912524851,
"grad_norm": 0.26991051093015467,
"learning_rate": 3.6606021397911605e-06,
"loss": 0.3651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12989675998687744,
"step": 970,
"valid_targets_mean": 10042.0,
"valid_targets_min": 3138
},
{
"epoch": 5.805168986083499,
"grad_norm": 0.19237954867107937,
"learning_rate": 3.491171554015886e-06,
"loss": 0.3661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10952335596084595,
"step": 975,
"valid_targets_mean": 9541.4,
"valid_targets_min": 2110
},
{
"epoch": 5.834990059642147,
"grad_norm": 0.2239480154769859,
"learning_rate": 3.3253799233288064e-06,
"loss": 0.3686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12480639666318893,
"step": 980,
"valid_targets_mean": 9702.5,
"valid_targets_min": 3851
},
{
"epoch": 5.864811133200796,
"grad_norm": 0.19907388113430682,
"learning_rate": 3.1632637923157517e-06,
"loss": 0.3629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11497035622596741,
"step": 985,
"valid_targets_mean": 9566.5,
"valid_targets_min": 2619
},
{
"epoch": 5.894632206759443,
"grad_norm": 0.20959374164502262,
"learning_rate": 3.004858895391294e-06,
"loss": 0.3634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11463524401187897,
"step": 990,
"valid_targets_mean": 8378.8,
"valid_targets_min": 2114
},
{
"epoch": 5.924453280318091,
"grad_norm": 0.1941612766228534,
"learning_rate": 2.8502001489220067e-06,
"loss": 0.3623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11086033284664154,
"step": 995,
"valid_targets_mean": 8698.3,
"valid_targets_min": 353
},
{
"epoch": 5.954274353876739,
"grad_norm": 0.19209598607356373,
"learning_rate": 2.6993216435300194e-06,
"loss": 0.3578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1245051771402359,
"step": 1000,
"valid_targets_mean": 9976.9,
"valid_targets_min": 2131
},
{
"epoch": 5.9840954274353875,
"grad_norm": 0.1970281152353345,
"learning_rate": 2.5522566365786094e-06,
"loss": 0.3568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12308112531900406,
"step": 1005,
"valid_targets_mean": 10133.8,
"valid_targets_min": 4146
},
{
"epoch": 6.01192842942346,
"grad_norm": 0.20447659959488607,
"learning_rate": 2.4090375448414505e-06,
"loss": 0.3636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12563642859458923,
"step": 1010,
"valid_targets_mean": 9465.2,
"valid_targets_min": 2441
},
{
"epoch": 6.041749502982108,
"grad_norm": 0.20437038455108572,
"learning_rate": 2.26969593735715e-06,
"loss": 0.3597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11522137373685837,
"step": 1015,
"valid_targets_mean": 9140.0,
"valid_targets_min": 2534
},
{
"epoch": 6.071570576540755,
"grad_norm": 0.22257426260166877,
"learning_rate": 2.1342625284706565e-06,
"loss": 0.364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1390356570482254,
"step": 1020,
"valid_targets_mean": 10661.7,
"valid_targets_min": 4246
},
{
"epoch": 6.101391650099403,
"grad_norm": 0.19365813495938394,
"learning_rate": 2.002767171063047e-06,
"loss": 0.3585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11969684064388275,
"step": 1025,
"valid_targets_mean": 9768.3,
"valid_targets_min": 5211
},
{
"epoch": 6.131212723658051,
"grad_norm": 0.20378104515618026,
"learning_rate": 1.875238849971226e-06,
"loss": 0.3634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1135728657245636,
"step": 1030,
"valid_targets_mean": 9446.8,
"valid_targets_min": 3712
},
{
"epoch": 6.1610337972166995,
"grad_norm": 0.239141322851671,
"learning_rate": 1.7517056755989336e-06,
"loss": 0.3607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12127785384654999,
"step": 1035,
"valid_targets_mean": 8748.2,
"valid_targets_min": 2842
},
{
"epoch": 6.190854870775348,
"grad_norm": 0.19410931594859926,
"learning_rate": 1.6321948777205232e-06,
"loss": 0.3594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11646410822868347,
"step": 1040,
"valid_targets_mean": 8943.3,
"valid_targets_min": 2508
},
{
"epoch": 6.220675944333996,
"grad_norm": 0.19578149244049262,
"learning_rate": 1.5167327994788484e-06,
"loss": 0.3653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13031047582626343,
"step": 1045,
"valid_targets_mean": 11259.7,
"valid_targets_min": 4629
},
{
"epoch": 6.250497017892644,
"grad_norm": 0.1934858659957491,
"learning_rate": 1.405344891578566e-06,
"loss": 0.3558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11782944202423096,
"step": 1050,
"valid_targets_mean": 10102.9,
"valid_targets_min": 5316
},
{
"epoch": 6.280318091451292,
"grad_norm": 0.19646734609934835,
"learning_rate": 1.2980557066761912e-06,
"loss": 0.364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1253916323184967,
"step": 1055,
"valid_targets_mean": 10088.6,
"valid_targets_min": 1946
},
{
"epoch": 6.31013916500994,
"grad_norm": 0.20590729415474335,
"learning_rate": 1.1948888939680647e-06,
"loss": 0.366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12973612546920776,
"step": 1060,
"valid_targets_mean": 10507.1,
"valid_targets_min": 2498
},
{
"epoch": 6.3399602385685885,
"grad_norm": 0.194011111497666,
"learning_rate": 1.0958671939774935e-06,
"loss": 0.3563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12194149196147919,
"step": 1065,
"valid_targets_mean": 9570.9,
"valid_targets_min": 2391
},
{
"epoch": 6.369781312127237,
"grad_norm": 0.20139731344154319,
"learning_rate": 1.0010124335421722e-06,
"loss": 0.3626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11091062426567078,
"step": 1070,
"valid_targets_mean": 9067.2,
"valid_targets_min": 2482
},
{
"epoch": 6.399602385685885,
"grad_norm": 0.19867046434302968,
"learning_rate": 9.103455210030066e-07,
"loss": 0.3616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11365881562232971,
"step": 1075,
"valid_targets_mean": 8556.3,
"valid_targets_min": 2374
},
{
"epoch": 6.429423459244533,
"grad_norm": 0.1853582529262351,
"learning_rate": 8.238864415954029e-07,
"loss": 0.3577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1302383840084076,
"step": 1080,
"valid_targets_mean": 9907.8,
"valid_targets_min": 2467
},
{
"epoch": 6.459244532803181,
"grad_norm": 0.20238865724155874,
"learning_rate": 7.416542530440174e-07,
"loss": 0.3685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1176309734582901,
"step": 1085,
"valid_targets_mean": 9380.4,
"valid_targets_min": 2382
},
{
"epoch": 6.489065606361829,
"grad_norm": 0.18362044771096164,
"learning_rate": 6.636670813619584e-07,
"loss": 0.3614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11380837857723236,
"step": 1090,
"valid_targets_mean": 10026.3,
"valid_targets_min": 4177
},
{
"epoch": 6.518886679920477,
"grad_norm": 0.20523149374890035,
"learning_rate": 5.899421168553887e-07,
"loss": 0.3669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12352012097835541,
"step": 1095,
"valid_targets_mean": 9891.7,
"valid_targets_min": 2916
},
{
"epoch": 6.548707753479126,
"grad_norm": 0.20207344241537156,
"learning_rate": 5.204956103343217e-07,
"loss": 0.3696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12412673979997635,
"step": 1100,
"valid_targets_mean": 9182.8,
"valid_targets_min": 2008
},
{
"epoch": 6.578528827037774,
"grad_norm": 0.2681278098515819,
"learning_rate": 4.5534286953056617e-07,
"loss": 0.3614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12740249931812286,
"step": 1105,
"valid_targets_mean": 10408.3,
"valid_targets_min": 3345
},
{
"epoch": 6.608349900596421,
"grad_norm": 0.2219640884288412,
"learning_rate": 3.9449825572350777e-07,
"loss": 0.3632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13584564626216888,
"step": 1110,
"valid_targets_mean": 10947.2,
"valid_targets_min": 2619
},
{
"epoch": 6.63817097415507,
"grad_norm": 0.20125432861092377,
"learning_rate": 3.379751805745257e-07,
"loss": 0.3617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12210185825824738,
"step": 1115,
"valid_targets_mean": 9717.5,
"valid_targets_min": 3124
},
{
"epoch": 6.667992047713717,
"grad_norm": 0.1994889274316608,
"learning_rate": 2.857861031707532e-07,
"loss": 0.3618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11578874289989471,
"step": 1120,
"valid_targets_mean": 8496.7,
"valid_targets_min": 2884
},
{
"epoch": 6.6978131212723655,
"grad_norm": 0.19330788740500873,
"learning_rate": 2.3794252727875611e-07,
"loss": 0.3562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11735907196998596,
"step": 1125,
"valid_targets_mean": 9695.8,
"valid_targets_min": 2649
},
{
"epoch": 6.727634194831014,
"grad_norm": 0.21484500199290843,
"learning_rate": 1.9445499880883067e-07,
"loss": 0.3638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12110140174627304,
"step": 1130,
"valid_targets_mean": 9903.2,
"valid_targets_min": 2360
},
{
"epoch": 6.757455268389662,
"grad_norm": 0.1965552602542897,
"learning_rate": 1.553331034904293e-07,
"loss": 0.3603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12987342476844788,
"step": 1135,
"valid_targets_mean": 10510.9,
"valid_targets_min": 4514
},
{
"epoch": 6.78727634194831,
"grad_norm": 0.19572567045219616,
"learning_rate": 1.2058546475921305e-07,
"loss": 0.3674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12360957264900208,
"step": 1140,
"valid_targets_mean": 9654.7,
"valid_targets_min": 2551
},
{
"epoch": 6.817097415506958,
"grad_norm": 0.20329684782457239,
"learning_rate": 9.021974185625004e-08,
"loss": 0.3644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12088020890951157,
"step": 1145,
"valid_targets_mean": 9744.9,
"valid_targets_min": 2669
},
{
"epoch": 6.846918489065606,
"grad_norm": 0.19353349526148012,
"learning_rate": 6.424262813971904e-08,
"loss": 0.3656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11435583233833313,
"step": 1150,
"valid_targets_mean": 9121.9,
"valid_targets_min": 1150
},
{
"epoch": 6.8767395626242545,
"grad_norm": 0.1849991984465579,
"learning_rate": 4.2659849609520966e-08,
"loss": 0.3632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12063400447368622,
"step": 1155,
"valid_targets_mean": 10022.1,
"valid_targets_min": 2233
},
{
"epoch": 6.906560636182903,
"grad_norm": 0.19007497403166831,
"learning_rate": 2.5476163645143936e-08,
"loss": 0.3687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11399763822555542,
"step": 1160,
"valid_targets_mean": 9254.8,
"valid_targets_min": 3517
},
{
"epoch": 6.936381709741551,
"grad_norm": 0.1948261449647107,
"learning_rate": 1.2695357957002163e-08,
"loss": 0.3579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09775931388139725,
"step": 1165,
"valid_targets_mean": 8721.5,
"valid_targets_min": 2682
},
{
"epoch": 6.966202783300199,
"grad_norm": 0.21107993966883115,
"learning_rate": 4.32024975154155e-09,
"loss": 0.3643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11073368787765503,
"step": 1170,
"valid_targets_mean": 8784.6,
"valid_targets_min": 2911
},
{
"epoch": 6.996023856858847,
"grad_norm": 0.19162092330786054,
"learning_rate": 3.5268511025421393e-10,
"loss": 0.3673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11796814203262329,
"step": 1175,
"valid_targets_mean": 9922.6,
"valid_targets_min": 3370
},
{
"epoch": 7.0,
"step": 1176,
"total_flos": 4.68228205146905e+18,
"train_loss": 0.0,
"train_runtime": 1.3887,
"train_samples_per_second": 81133.704,
"train_steps_per_second": 846.823
}
],
"logging_steps": 5,
"max_steps": 1176,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.68228205146905e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}