{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02982107355864811, "grad_norm": 13.09333147683712, "learning_rate": 1.3559322033898307e-06, "loss": 0.9066, "loss_nan_ranks": 0, "loss_rank_avg": 0.3346264064311981, "step": 5, "valid_targets_mean": 9695.2, "valid_targets_min": 3418 }, { "epoch": 0.05964214711729622, "grad_norm": 9.149937267728706, "learning_rate": 3.0508474576271192e-06, "loss": 0.8761, "loss_nan_ranks": 0, "loss_rank_avg": 0.28128373622894287, "step": 10, "valid_targets_mean": 9758.7, "valid_targets_min": 2911 }, { "epoch": 0.08946322067594434, "grad_norm": 4.520961825372613, "learning_rate": 4.745762711864408e-06, "loss": 0.8155, "loss_nan_ranks": 0, "loss_rank_avg": 0.23452389240264893, "step": 15, "valid_targets_mean": 8859.2, "valid_targets_min": 1366 }, { "epoch": 0.11928429423459244, "grad_norm": 2.114054619234936, "learning_rate": 6.440677966101695e-06, "loss": 0.7578, "loss_nan_ranks": 0, "loss_rank_avg": 0.24326761066913605, "step": 20, "valid_targets_mean": 8662.6, "valid_targets_min": 2822 }, { "epoch": 0.14910536779324055, "grad_norm": 1.4393876248426487, "learning_rate": 8.135593220338983e-06, "loss": 0.7069, "loss_nan_ranks": 0, "loss_rank_avg": 0.260776162147522, "step": 25, "valid_targets_mean": 10508.2, "valid_targets_min": 3664 }, { "epoch": 0.17892644135188868, "grad_norm": 1.1642740315365356, "learning_rate": 9.830508474576272e-06, "loss": 0.6928, "loss_nan_ranks": 0, "loss_rank_avg": 0.2416732907295227, "step": 30, "valid_targets_mean": 9538.0, "valid_targets_min": 2536 }, { "epoch": 0.20874751491053678, "grad_norm": 0.7766319665933883, "learning_rate": 1.1525423728813561e-05, "loss": 0.6532, "loss_nan_ranks": 0, "loss_rank_avg": 0.21398067474365234, "step": 35, "valid_targets_mean": 9878.9, "valid_targets_min": 3211 }, { "epoch": 0.23856858846918488, "grad_norm": 0.5838308603893411, "learning_rate": 1.3220338983050848e-05, "loss": 0.6283, "loss_nan_ranks": 0, "loss_rank_avg": 0.20412425696849823, "step": 40, "valid_targets_mean": 9810.7, "valid_targets_min": 2899 }, { "epoch": 0.268389662027833, "grad_norm": 0.5037663875608299, "learning_rate": 1.4915254237288137e-05, "loss": 0.605, "loss_nan_ranks": 0, "loss_rank_avg": 0.1884448379278183, "step": 45, "valid_targets_mean": 9185.9, "valid_targets_min": 2971 }, { "epoch": 0.2982107355864811, "grad_norm": 0.39709002181472497, "learning_rate": 1.6610169491525424e-05, "loss": 0.5845, "loss_nan_ranks": 0, "loss_rank_avg": 0.17413000762462616, "step": 50, "valid_targets_mean": 8571.4, "valid_targets_min": 2940 }, { "epoch": 0.32803180914512925, "grad_norm": 0.33127605925110176, "learning_rate": 1.8305084745762713e-05, "loss": 0.5575, "loss_nan_ranks": 0, "loss_rank_avg": 0.16696935892105103, "step": 55, "valid_targets_mean": 9093.4, "valid_targets_min": 2520 }, { "epoch": 0.35785288270377735, "grad_norm": 0.3215099174730303, "learning_rate": 2e-05, "loss": 0.5544, "loss_nan_ranks": 0, "loss_rank_avg": 0.17364796996116638, "step": 60, "valid_targets_mean": 8888.1, "valid_targets_min": 2491 }, { "epoch": 0.38767395626242546, "grad_norm": 0.2821181675694596, "learning_rate": 2.169491525423729e-05, "loss": 0.5342, "loss_nan_ranks": 0, "loss_rank_avg": 0.18019835650920868, "step": 65, "valid_targets_mean": 9505.2, "valid_targets_min": 3936 }, { "epoch": 0.41749502982107356, "grad_norm": 0.2414187942326976, "learning_rate": 2.338983050847458e-05, "loss": 0.5251, "loss_nan_ranks": 0, "loss_rank_avg": 0.18193358182907104, "step": 70, "valid_targets_mean": 9565.2, "valid_targets_min": 3319 }, { "epoch": 0.44731610337972166, "grad_norm": 0.22420801544120914, "learning_rate": 2.5084745762711865e-05, "loss": 0.512, "loss_nan_ranks": 0, "loss_rank_avg": 0.14556047320365906, "step": 75, "valid_targets_mean": 9219.1, "valid_targets_min": 3220 }, { "epoch": 0.47713717693836977, "grad_norm": 0.2412726568511184, "learning_rate": 2.6779661016949153e-05, "loss": 0.5028, "loss_nan_ranks": 0, "loss_rank_avg": 0.16368263959884644, "step": 80, "valid_targets_mean": 8981.2, "valid_targets_min": 3194 }, { "epoch": 0.5069582504970179, "grad_norm": 0.23318641532107723, "learning_rate": 2.8474576271186442e-05, "loss": 0.5007, "loss_nan_ranks": 0, "loss_rank_avg": 0.16197912395000458, "step": 85, "valid_targets_mean": 8739.4, "valid_targets_min": 2389 }, { "epoch": 0.536779324055666, "grad_norm": 0.23052715262359902, "learning_rate": 3.016949152542373e-05, "loss": 0.4809, "loss_nan_ranks": 0, "loss_rank_avg": 0.1737665832042694, "step": 90, "valid_targets_mean": 10664.4, "valid_targets_min": 1536 }, { "epoch": 0.5666003976143141, "grad_norm": 0.22528756572218692, "learning_rate": 3.186440677966102e-05, "loss": 0.4788, "loss_nan_ranks": 0, "loss_rank_avg": 0.1719246357679367, "step": 95, "valid_targets_mean": 10288.0, "valid_targets_min": 2629 }, { "epoch": 0.5964214711729622, "grad_norm": 0.2601607078156621, "learning_rate": 3.355932203389831e-05, "loss": 0.4687, "loss_nan_ranks": 0, "loss_rank_avg": 0.1524590253829956, "step": 100, "valid_targets_mean": 9881.0, "valid_targets_min": 2893 }, { "epoch": 0.6262425447316103, "grad_norm": 0.22675199525629694, "learning_rate": 3.52542372881356e-05, "loss": 0.4565, "loss_nan_ranks": 0, "loss_rank_avg": 0.15666724741458893, "step": 105, "valid_targets_mean": 10118.0, "valid_targets_min": 3280 }, { "epoch": 0.6560636182902585, "grad_norm": 0.23545944466896276, "learning_rate": 3.6949152542372886e-05, "loss": 0.4635, "loss_nan_ranks": 0, "loss_rank_avg": 0.15132704377174377, "step": 110, "valid_targets_mean": 9088.4, "valid_targets_min": 1751 }, { "epoch": 0.6858846918489065, "grad_norm": 0.2552945140336154, "learning_rate": 3.8644067796610175e-05, "loss": 0.4624, "loss_nan_ranks": 0, "loss_rank_avg": 0.15736186504364014, "step": 115, "valid_targets_mean": 9703.8, "valid_targets_min": 2496 }, { "epoch": 0.7157057654075547, "grad_norm": 0.2661696526978554, "learning_rate": 3.999991182852808e-05, "loss": 0.4518, "loss_nan_ranks": 0, "loss_rank_avg": 0.14347058534622192, "step": 120, "valid_targets_mean": 8487.7, "valid_targets_min": 2297 }, { "epoch": 0.7455268389662028, "grad_norm": 0.24265446044028421, "learning_rate": 3.999682590863935e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.15881270170211792, "step": 125, "valid_targets_mean": 10650.6, "valid_targets_min": 4557 }, { "epoch": 0.7753479125248509, "grad_norm": 0.3477779737132978, "learning_rate": 3.9989332192544725e-05, "loss": 0.4454, "loss_nan_ranks": 0, "loss_rank_avg": 0.1669636368751526, "step": 130, "valid_targets_mean": 10482.2, "valid_targets_min": 3995 }, { "epoch": 0.805168986083499, "grad_norm": 0.2688791723605399, "learning_rate": 3.997743233204502e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.1437249481678009, "step": 135, "valid_targets_mean": 9449.9, "valid_targets_min": 2652 }, { "epoch": 0.8349900596421471, "grad_norm": 0.27803453281851986, "learning_rate": 3.996112895016452e-05, "loss": 0.4377, "loss_nan_ranks": 0, "loss_rank_avg": 0.13932648301124573, "step": 140, "valid_targets_mean": 9696.8, "valid_targets_min": 3542 }, { "epoch": 0.8648111332007953, "grad_norm": 0.2616493730811242, "learning_rate": 3.994042564057279e-05, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.14841462671756744, "step": 145, "valid_targets_mean": 9823.4, "valid_targets_min": 3035 }, { "epoch": 0.8946322067594433, "grad_norm": 0.2763712349418631, "learning_rate": 3.9915326966792555e-05, "loss": 0.4393, "loss_nan_ranks": 0, "loss_rank_avg": 0.14706194400787354, "step": 150, "valid_targets_mean": 9098.7, "valid_targets_min": 2668 }, { "epoch": 0.9244532803180915, "grad_norm": 0.29517511116566714, "learning_rate": 3.9885838461193794e-05, "loss": 0.4316, "loss_nan_ranks": 0, "loss_rank_avg": 0.1425623744726181, "step": 155, "valid_targets_mean": 9000.5, "valid_targets_min": 3371 }, { "epoch": 0.9542743538767395, "grad_norm": 0.304747766618288, "learning_rate": 3.985196662377424e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.1514747142791748, "step": 160, "valid_targets_mean": 9558.1, "valid_targets_min": 3439 }, { "epoch": 0.9840954274353877, "grad_norm": 0.25971095278000167, "learning_rate": 3.981371892072661e-05, "loss": 0.4321, "loss_nan_ranks": 0, "loss_rank_avg": 0.14394722878932953, "step": 165, "valid_targets_mean": 9277.1, "valid_targets_min": 3851 }, { "epoch": 1.0119284294234592, "grad_norm": 0.2900070928223118, "learning_rate": 3.9771103782792956e-05, "loss": 0.428, "loss_nan_ranks": 0, "loss_rank_avg": 0.1467021405696869, "step": 170, "valid_targets_mean": 9804.5, "valid_targets_min": 3955 }, { "epoch": 1.0417495029821073, "grad_norm": 0.2562110209083835, "learning_rate": 3.9724130603406204e-05, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.15313297510147095, "step": 175, "valid_targets_mean": 10062.1, "valid_targets_min": 2682 }, { "epoch": 1.0715705765407555, "grad_norm": 0.26570170838169876, "learning_rate": 3.9672809736619684e-05, "loss": 0.426, "loss_nan_ranks": 0, "loss_rank_avg": 0.1393103003501892, "step": 180, "valid_targets_mean": 9271.5, "valid_targets_min": 1624 }, { "epoch": 1.1013916500994037, "grad_norm": 0.2757881608821532, "learning_rate": 3.961715249482482e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.14483334124088287, "step": 185, "valid_targets_mean": 9806.2, "valid_targets_min": 2567 }, { "epoch": 1.1312127236580518, "grad_norm": 0.2593086099940217, "learning_rate": 3.95571711462576e-05, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.15444408357143402, "step": 190, "valid_targets_mean": 10145.6, "valid_targets_min": 3190 }, { "epoch": 1.1610337972166997, "grad_norm": 0.3074440806567765, "learning_rate": 3.9492878912294345e-05, "loss": 0.4246, "loss_nan_ranks": 0, "loss_rank_avg": 0.13667774200439453, "step": 195, "valid_targets_mean": 8876.5, "valid_targets_min": 2465 }, { "epoch": 1.190854870775348, "grad_norm": 0.26202363702381226, "learning_rate": 3.942428996453741e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.1346900910139084, "step": 200, "valid_targets_mean": 9183.1, "valid_targets_min": 3115 }, { "epoch": 1.220675944333996, "grad_norm": 0.23482546143374838, "learning_rate": 3.935141942169138e-05, "loss": 0.4196, "loss_nan_ranks": 0, "loss_rank_avg": 0.14526072144508362, "step": 205, "valid_targets_mean": 10055.5, "valid_targets_min": 1170 }, { "epoch": 1.250497017892644, "grad_norm": 0.24783213003782373, "learning_rate": 3.927428334623054e-05, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.13255912065505981, "step": 210, "valid_targets_mean": 10057.3, "valid_targets_min": 4200 }, { "epoch": 1.2803180914512922, "grad_norm": 0.3356314182794496, "learning_rate": 3.919289874085837e-05, "loss": 0.4219, "loss_nan_ranks": 0, "loss_rank_avg": 0.14556168019771576, "step": 215, "valid_targets_mean": 9935.3, "valid_targets_min": 4601 }, { "epoch": 1.3101391650099403, "grad_norm": 0.3205331125262116, "learning_rate": 3.910728354475961e-05, "loss": 0.4156, "loss_nan_ranks": 0, "loss_rank_avg": 0.14186373353004456, "step": 220, "valid_targets_mean": 10047.3, "valid_targets_min": 3586 }, { "epoch": 1.3399602385685885, "grad_norm": 0.24942595965959885, "learning_rate": 3.9017456629646126e-05, "loss": 0.4047, "loss_nan_ranks": 0, "loss_rank_avg": 0.1405375897884369, "step": 225, "valid_targets_mean": 10060.8, "valid_targets_min": 3242 }, { "epoch": 1.3697813121272366, "grad_norm": 0.2584612306483155, "learning_rate": 3.8923437795597056e-05, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.11859870702028275, "step": 230, "valid_targets_mean": 8309.6, "valid_targets_min": 3444 }, { "epoch": 1.3996023856858848, "grad_norm": 0.25784491517610686, "learning_rate": 3.882524776669442e-05, "loss": 0.415, "loss_nan_ranks": 0, "loss_rank_avg": 0.13979320228099823, "step": 235, "valid_targets_mean": 9624.1, "valid_targets_min": 2944 }, { "epoch": 1.4294234592445327, "grad_norm": 0.261391607331903, "learning_rate": 3.872290818645497e-05, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.1375332772731781, "step": 240, "valid_targets_mean": 9274.7, "valid_targets_min": 2578 }, { "epoch": 1.459244532803181, "grad_norm": 0.26508343196177364, "learning_rate": 3.861644161305948e-05, "loss": 0.4094, "loss_nan_ranks": 0, "loss_rank_avg": 0.1376476287841797, "step": 245, "valid_targets_mean": 9600.3, "valid_targets_min": 3024 }, { "epoch": 1.489065606361829, "grad_norm": 0.25905924292751825, "learning_rate": 3.850587151438031e-05, "loss": 0.4075, "loss_nan_ranks": 0, "loss_rank_avg": 0.12836024165153503, "step": 250, "valid_targets_mean": 8532.4, "valid_targets_min": 2314 }, { "epoch": 1.518886679920477, "grad_norm": 0.24769087290084157, "learning_rate": 3.839122226280854e-05, "loss": 0.4106, "loss_nan_ranks": 0, "loss_rank_avg": 0.1362186223268509, "step": 255, "valid_targets_mean": 9983.8, "valid_targets_min": 3384 }, { "epoch": 1.5487077534791251, "grad_norm": 0.24871240968489136, "learning_rate": 3.8272519129881696e-05, "loss": 0.4046, "loss_nan_ranks": 0, "loss_rank_avg": 0.1329842209815979, "step": 260, "valid_targets_mean": 9754.4, "valid_targets_min": 3902 }, { "epoch": 1.5785288270377733, "grad_norm": 0.2574497272355019, "learning_rate": 3.814978828071325e-05, "loss": 0.4081, "loss_nan_ranks": 0, "loss_rank_avg": 0.14275474846363068, "step": 265, "valid_targets_mean": 9707.1, "valid_targets_min": 2847 }, { "epoch": 1.6083499005964215, "grad_norm": 0.26745189858092433, "learning_rate": 3.802305676822517e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.13096818327903748, "step": 270, "valid_targets_mean": 9074.9, "valid_targets_min": 2634 }, { "epoch": 1.6381709741550696, "grad_norm": 0.2801974592140141, "learning_rate": 3.789235252718484e-05, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.14643187820911407, "step": 275, "valid_targets_mean": 9043.5, "valid_targets_min": 3736 }, { "epoch": 1.6679920477137178, "grad_norm": 0.24615159428763864, "learning_rate": 3.775770436804751e-05, "loss": 0.4088, "loss_nan_ranks": 0, "loss_rank_avg": 0.1417016237974167, "step": 280, "valid_targets_mean": 10256.2, "valid_targets_min": 1765 }, { "epoch": 1.697813121272366, "grad_norm": 0.255233286604699, "learning_rate": 3.761914197060573e-05, "loss": 0.4005, "loss_nan_ranks": 0, "loss_rank_avg": 0.12331944704055786, "step": 285, "valid_targets_mean": 9173.6, "valid_targets_min": 3254 }, { "epoch": 1.7276341948310139, "grad_norm": 0.2779216978110516, "learning_rate": 3.747669587744723e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.13669756054878235, "step": 290, "valid_targets_mean": 9882.7, "valid_targets_min": 2297 }, { "epoch": 1.757455268389662, "grad_norm": 0.26422478087504886, "learning_rate": 3.733039748722258e-05, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.14321698248386383, "step": 295, "valid_targets_mean": 9899.4, "valid_targets_min": 2409 }, { "epoch": 1.78727634194831, "grad_norm": 0.3959250627738573, "learning_rate": 3.718027904772412e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.15091611444950104, "step": 300, "valid_targets_mean": 9824.9, "valid_targets_min": 1650 }, { "epoch": 1.8170974155069581, "grad_norm": 0.28379938678387157, "learning_rate": 3.702637364877776e-05, "loss": 0.4095, "loss_nan_ranks": 0, "loss_rank_avg": 0.1386471688747406, "step": 305, "valid_targets_mean": 9847.7, "valid_targets_min": 3711 }, { "epoch": 1.8469184890656063, "grad_norm": 0.2889207311661245, "learning_rate": 3.686871521494915e-05, "loss": 0.4033, "loss_nan_ranks": 0, "loss_rank_avg": 0.14630842208862305, "step": 310, "valid_targets_mean": 9958.7, "valid_targets_min": 2990 }, { "epoch": 1.8767395626242545, "grad_norm": 0.2561333348175739, "learning_rate": 3.67073384980659e-05, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.12660804390907288, "step": 315, "valid_targets_mean": 9167.2, "valid_targets_min": 1410 }, { "epoch": 1.9065606361829026, "grad_norm": 0.2914965872986488, "learning_rate": 3.654227906955737e-05, "loss": 0.3981, "loss_nan_ranks": 0, "loss_rank_avg": 0.14069530367851257, "step": 320, "valid_targets_mean": 9996.4, "valid_targets_min": 2710 }, { "epoch": 1.9363817097415508, "grad_norm": 0.2543075725463023, "learning_rate": 3.6373573312613874e-05, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340501606464386, "step": 325, "valid_targets_mean": 9937.8, "valid_targets_min": 3155 }, { "epoch": 1.966202783300199, "grad_norm": 0.2646619167637183, "learning_rate": 3.620125841416692e-05, "loss": 0.3987, "loss_nan_ranks": 0, "loss_rank_avg": 0.13946759700775146, "step": 330, "valid_targets_mean": 9728.8, "valid_targets_min": 2807 }, { "epoch": 1.9960238568588469, "grad_norm": 0.294581414903317, "learning_rate": 3.602537235669228e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.12473136186599731, "step": 335, "valid_targets_mean": 9149.7, "valid_targets_min": 3235 }, { "epoch": 2.0238568588469183, "grad_norm": 0.27850055295898757, "learning_rate": 3.5845953909837716e-05, "loss": 0.393, "loss_nan_ranks": 0, "loss_rank_avg": 0.12054517865180969, "step": 340, "valid_targets_mean": 8854.7, "valid_targets_min": 1949 }, { "epoch": 2.0536779324055665, "grad_norm": 0.26244144180768225, "learning_rate": 3.566304262187718e-05, "loss": 0.3938, "loss_nan_ranks": 0, "loss_rank_avg": 0.12201672792434692, "step": 345, "valid_targets_mean": 8900.0, "valid_targets_min": 2330 }, { "epoch": 2.0834990059642147, "grad_norm": 0.2675354283095, "learning_rate": 3.547667881099341e-05, "loss": 0.3931, "loss_nan_ranks": 0, "loss_rank_avg": 0.1361660212278366, "step": 350, "valid_targets_mean": 10345.9, "valid_targets_min": 5477 }, { "epoch": 2.113320079522863, "grad_norm": 0.27333374666170746, "learning_rate": 3.528690355639079e-05, "loss": 0.4028, "loss_nan_ranks": 0, "loss_rank_avg": 0.1383921355009079, "step": 355, "valid_targets_mean": 10431.5, "valid_targets_min": 2495 }, { "epoch": 2.143141153081511, "grad_norm": 0.2515801080189176, "learning_rate": 3.509375868924048e-05, "loss": 0.3857, "loss_nan_ranks": 0, "loss_rank_avg": 0.11818955838680267, "step": 360, "valid_targets_mean": 8793.7, "valid_targets_min": 3129 }, { "epoch": 2.172962226640159, "grad_norm": 0.2625179819877921, "learning_rate": 3.489728678345978e-05, "loss": 0.3938, "loss_nan_ranks": 0, "loss_rank_avg": 0.13788491487503052, "step": 365, "valid_targets_mean": 10044.4, "valid_targets_min": 4449 }, { "epoch": 2.2027833001988073, "grad_norm": 0.2964649252713314, "learning_rate": 3.46975311463278e-05, "loss": 0.3843, "loss_nan_ranks": 0, "loss_rank_avg": 0.13454961776733398, "step": 370, "valid_targets_mean": 9236.4, "valid_targets_min": 2752 }, { "epoch": 2.2326043737574555, "grad_norm": 0.2710375300470546, "learning_rate": 3.449453580893945e-05, "loss": 0.3968, "loss_nan_ranks": 0, "loss_rank_avg": 0.12816333770751953, "step": 375, "valid_targets_mean": 9797.3, "valid_targets_min": 3511 }, { "epoch": 2.2624254473161036, "grad_norm": 0.25606918527553785, "learning_rate": 3.428834551649989e-05, "loss": 0.387, "loss_nan_ranks": 0, "loss_rank_avg": 0.13535423576831818, "step": 380, "valid_targets_mean": 9259.8, "valid_targets_min": 1987 }, { "epoch": 2.2922465208747513, "grad_norm": 0.25758666339894315, "learning_rate": 3.4079005718461596e-05, "loss": 0.3935, "loss_nan_ranks": 0, "loss_rank_avg": 0.1293325573205948, "step": 385, "valid_targets_mean": 8947.5, "valid_targets_min": 2799 }, { "epoch": 2.3220675944333995, "grad_norm": 0.24234110541082965, "learning_rate": 3.386656255850617e-05, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.1355832815170288, "step": 390, "valid_targets_mean": 10094.5, "valid_targets_min": 1932 }, { "epoch": 2.3518886679920477, "grad_norm": 0.25695722708253604, "learning_rate": 3.365106286437309e-05, "loss": 0.3874, "loss_nan_ranks": 0, "loss_rank_avg": 0.12625740468502045, "step": 395, "valid_targets_mean": 9291.4, "valid_targets_min": 3611 }, { "epoch": 2.381709741550696, "grad_norm": 0.2563116812129519, "learning_rate": 3.3432554137537764e-05, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.11663530766963959, "step": 400, "valid_targets_mean": 9260.6, "valid_targets_min": 2691 }, { "epoch": 2.411530815109344, "grad_norm": 0.2572513815596599, "learning_rate": 3.321108454274103e-05, "loss": 0.3907, "loss_nan_ranks": 0, "loss_rank_avg": 0.12803307175636292, "step": 405, "valid_targets_mean": 9468.8, "valid_targets_min": 3154 }, { "epoch": 2.441351888667992, "grad_norm": 0.2607707850998344, "learning_rate": 3.29867028973724e-05, "loss": 0.3859, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312410533428192, "step": 410, "valid_targets_mean": 8711.6, "valid_targets_min": 3918 }, { "epoch": 2.4711729622266403, "grad_norm": 0.23731121331277513, "learning_rate": 3.275945866070955e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.14013691246509552, "step": 415, "valid_targets_mean": 10685.4, "valid_targets_min": 2421 }, { "epoch": 2.500994035785288, "grad_norm": 0.24444398626626032, "learning_rate": 3.252940192301624e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.12714995443820953, "step": 420, "valid_targets_mean": 10293.2, "valid_targets_min": 3706 }, { "epoch": 2.530815109343936, "grad_norm": 0.2405776417574965, "learning_rate": 3.229658339450119e-05, "loss": 0.3903, "loss_nan_ranks": 0, "loss_rank_avg": 0.13615553081035614, "step": 425, "valid_targets_mean": 10000.6, "valid_targets_min": 3407 }, { "epoch": 2.5606361829025843, "grad_norm": 0.24845003130000037, "learning_rate": 3.2061054394140285e-05, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.1352383941411972, "step": 430, "valid_targets_mean": 10112.7, "valid_targets_min": 3079 }, { "epoch": 2.5904572564612325, "grad_norm": 0.2562994770469392, "learning_rate": 3.182286683836461e-05, "loss": 0.3854, "loss_nan_ranks": 0, "loss_rank_avg": 0.12299023568630219, "step": 435, "valid_targets_mean": 8783.9, "valid_targets_min": 1946 }, { "epoch": 2.6202783300198806, "grad_norm": 0.2591727396961362, "learning_rate": 3.158207322961678e-05, "loss": 0.389, "loss_nan_ranks": 0, "loss_rank_avg": 0.12027916312217712, "step": 440, "valid_targets_mean": 8550.8, "valid_targets_min": 1304 }, { "epoch": 2.650099403578529, "grad_norm": 0.23343608939765326, "learning_rate": 3.1338726644778084e-05, "loss": 0.3798, "loss_nan_ranks": 0, "loss_rank_avg": 0.12307856976985931, "step": 445, "valid_targets_mean": 9197.8, "valid_targets_min": 3880 }, { "epoch": 2.679920477137177, "grad_norm": 0.2608548923226282, "learning_rate": 3.109288072346904e-05, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.13536040484905243, "step": 450, "valid_targets_mean": 9562.6, "valid_targets_min": 1005 }, { "epoch": 2.709741550695825, "grad_norm": 0.2674341383904537, "learning_rate": 3.084458965622591e-05, "loss": 0.3854, "loss_nan_ranks": 0, "loss_rank_avg": 0.1220935583114624, "step": 455, "valid_targets_mean": 9237.2, "valid_targets_min": 3261 }, { "epoch": 2.7395626242544733, "grad_norm": 0.24113612336638285, "learning_rate": 3.0593908172555696e-05, "loss": 0.3829, "loss_nan_ranks": 0, "loss_rank_avg": 0.12890732288360596, "step": 460, "valid_targets_mean": 9338.5, "valid_targets_min": 1751 }, { "epoch": 2.7693836978131214, "grad_norm": 0.23504577264959453, "learning_rate": 3.0340891528872503e-05, "loss": 0.3861, "loss_nan_ranks": 0, "loss_rank_avg": 0.1365663856267929, "step": 465, "valid_targets_mean": 10284.9, "valid_targets_min": 3324 }, { "epoch": 2.7992047713717696, "grad_norm": 0.23542940739467466, "learning_rate": 3.0085595496317558e-05, "loss": 0.386, "loss_nan_ranks": 0, "loss_rank_avg": 0.12416007369756699, "step": 470, "valid_targets_mean": 9443.1, "valid_targets_min": 2297 }, { "epoch": 2.8290258449304178, "grad_norm": 0.2520271616790775, "learning_rate": 2.9828076348465913e-05, "loss": 0.3887, "loss_nan_ranks": 0, "loss_rank_avg": 0.12400713562965393, "step": 475, "valid_targets_mean": 8372.7, "valid_targets_min": 3158 }, { "epoch": 2.8588469184890655, "grad_norm": 0.2617214720802655, "learning_rate": 2.956839084892235e-05, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.13921988010406494, "step": 480, "valid_targets_mean": 10316.4, "valid_targets_min": 2117 }, { "epoch": 2.8886679920477136, "grad_norm": 0.23989440057199513, "learning_rate": 2.9306596238809292e-05, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.12483088672161102, "step": 485, "valid_targets_mean": 9510.9, "valid_targets_min": 1113 }, { "epoch": 2.918489065606362, "grad_norm": 0.2619100232438116, "learning_rate": 2.9042750224149396e-05, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.1344929337501526, "step": 490, "valid_targets_mean": 10448.7, "valid_targets_min": 4344 }, { "epoch": 2.94831013916501, "grad_norm": 0.24604640536880984, "learning_rate": 2.877691096314576e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.13883237540721893, "step": 495, "valid_targets_mean": 10006.5, "valid_targets_min": 3781 }, { "epoch": 2.978131212723658, "grad_norm": 0.24449981627934345, "learning_rate": 2.850913705336238e-05, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.12503878772258759, "step": 500, "valid_targets_mean": 9295.5, "valid_targets_min": 2761 }, { "epoch": 3.00596421471173, "grad_norm": 0.25973568404266545, "learning_rate": 2.8239487518807816e-05, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.11682064831256866, "step": 505, "valid_targets_mean": 8969.0, "valid_targets_min": 2105 }, { "epoch": 3.0357852882703775, "grad_norm": 0.22127882526750678, "learning_rate": 2.7968021796924834e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.10894481837749481, "step": 510, "valid_targets_mean": 8102.7, "valid_targets_min": 4194 }, { "epoch": 3.0656063618290257, "grad_norm": 0.2568154982874112, "learning_rate": 2.76947997254889e-05, "loss": 0.3794, "loss_nan_ranks": 0, "loss_rank_avg": 0.1269792914390564, "step": 515, "valid_targets_mean": 9722.5, "valid_targets_min": 3461 }, { "epoch": 3.095427435387674, "grad_norm": 0.2418191676681759, "learning_rate": 2.741988152941849e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.1313607394695282, "step": 520, "valid_targets_mean": 10292.7, "valid_targets_min": 2701 }, { "epoch": 3.125248508946322, "grad_norm": 0.26137529392846076, "learning_rate": 2.714332780749997e-05, "loss": 0.3727, "loss_nan_ranks": 0, "loss_rank_avg": 0.12172074615955353, "step": 525, "valid_targets_mean": 9764.4, "valid_targets_min": 2498 }, { "epoch": 3.15506958250497, "grad_norm": 0.24623727496182754, "learning_rate": 2.6865199519030178e-05, "loss": 0.3779, "loss_nan_ranks": 0, "loss_rank_avg": 0.12355610728263855, "step": 530, "valid_targets_mean": 9638.3, "valid_targets_min": 3022 }, { "epoch": 3.1848906560636183, "grad_norm": 0.24954885423234177, "learning_rate": 2.658555797037945e-05, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.12461437284946442, "step": 535, "valid_targets_mean": 8588.2, "valid_targets_min": 1886 }, { "epoch": 3.2147117296222665, "grad_norm": 0.21483912308106684, "learning_rate": 2.6304464801478177e-05, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340673416852951, "step": 540, "valid_targets_mean": 10699.2, "valid_targets_min": 2852 }, { "epoch": 3.2445328031809146, "grad_norm": 0.253090712355003, "learning_rate": 2.6021981972229852e-05, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.1394907385110855, "step": 545, "valid_targets_mean": 9860.4, "valid_targets_min": 2323 }, { "epoch": 3.274353876739563, "grad_norm": 0.22081826679193847, "learning_rate": 2.5738171748853552e-05, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.1250452846288681, "step": 550, "valid_targets_mean": 10036.3, "valid_targets_min": 1623 }, { "epoch": 3.3041749502982105, "grad_norm": 0.25504537067123745, "learning_rate": 2.545309669015895e-05, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.12748511135578156, "step": 555, "valid_targets_mean": 10318.6, "valid_targets_min": 4635 }, { "epoch": 3.3339960238568587, "grad_norm": 0.23504480529865782, "learning_rate": 2.5166819633756746e-05, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.12984226644039154, "step": 560, "valid_targets_mean": 9720.7, "valid_targets_min": 2968 }, { "epoch": 3.363817097415507, "grad_norm": 0.22141055145170896, "learning_rate": 2.4879403682207775e-05, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256609559059143, "step": 565, "valid_targets_mean": 9638.0, "valid_targets_min": 2492 }, { "epoch": 3.393638170974155, "grad_norm": 0.24107550315856965, "learning_rate": 2.4590912189113575e-05, "loss": 0.3759, "loss_nan_ranks": 0, "loss_rank_avg": 0.13189947605133057, "step": 570, "valid_targets_mean": 9626.9, "valid_targets_min": 2284 }, { "epoch": 3.423459244532803, "grad_norm": 0.23868274137862344, "learning_rate": 2.430140874515171e-05, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.12700285017490387, "step": 575, "valid_targets_mean": 9807.8, "valid_targets_min": 3978 }, { "epoch": 3.4532803180914513, "grad_norm": 0.22377331028220349, "learning_rate": 2.4010957164058803e-05, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.1289224922657013, "step": 580, "valid_targets_mean": 9460.9, "valid_targets_min": 2662 }, { "epoch": 3.4831013916500995, "grad_norm": 0.24019351233889105, "learning_rate": 2.3719621468564416e-05, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.12393779307603836, "step": 585, "valid_targets_mean": 9742.5, "valid_targets_min": 2693 }, { "epoch": 3.5129224652087476, "grad_norm": 0.24701426386806288, "learning_rate": 2.3427465876278843e-05, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.12272713333368301, "step": 590, "valid_targets_mean": 10039.6, "valid_targets_min": 2944 }, { "epoch": 3.542743538767396, "grad_norm": 0.25447639049380394, "learning_rate": 2.3134554785537943e-05, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.1295909881591797, "step": 595, "valid_targets_mean": 9521.8, "valid_targets_min": 2744 }, { "epoch": 3.572564612326044, "grad_norm": 0.21876380474118456, "learning_rate": 2.284095276120818e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.11200149357318878, "step": 600, "valid_targets_mean": 9297.1, "valid_targets_min": 4191 }, { "epoch": 3.602385685884692, "grad_norm": 0.2876203923550237, "learning_rate": 2.2546724520454916e-05, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.13492657244205475, "step": 605, "valid_targets_mean": 9474.2, "valid_targets_min": 2691 }, { "epoch": 3.63220675944334, "grad_norm": 0.3109728940450909, "learning_rate": 2.2251934918477126e-05, "loss": 0.3764, "loss_nan_ranks": 0, "loss_rank_avg": 0.1116630882024765, "step": 610, "valid_targets_mean": 8435.2, "valid_targets_min": 2827 }, { "epoch": 3.662027833001988, "grad_norm": 0.23811609842596262, "learning_rate": 2.1956648934211717e-05, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.11188781261444092, "step": 615, "valid_targets_mean": 9120.6, "valid_targets_min": 2188 }, { "epoch": 3.691848906560636, "grad_norm": 0.22411995085803113, "learning_rate": 2.1660931656010568e-05, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.11489223688840866, "step": 620, "valid_targets_mean": 8820.9, "valid_targets_min": 3292 }, { "epoch": 3.7216699801192843, "grad_norm": 0.22528057105116367, "learning_rate": 2.1364848267293424e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.11487048119306564, "step": 625, "valid_targets_mean": 8887.9, "valid_targets_min": 2649 }, { "epoch": 3.7514910536779325, "grad_norm": 0.23210531928235775, "learning_rate": 2.106846403217987e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.12289533764123917, "step": 630, "valid_targets_mean": 9040.5, "valid_targets_min": 2927 }, { "epoch": 3.7813121272365806, "grad_norm": 0.22420265923284372, "learning_rate": 2.0771844281103503e-05, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.1272115558385849, "step": 635, "valid_targets_mean": 9784.5, "valid_targets_min": 3904 }, { "epoch": 3.8111332007952288, "grad_norm": 0.2463614011353854, "learning_rate": 2.0475054396411464e-05, "loss": 0.3737, "loss_nan_ranks": 0, "loss_rank_avg": 0.13237622380256653, "step": 640, "valid_targets_mean": 10127.7, "valid_targets_min": 2225 }, { "epoch": 3.8409542743538765, "grad_norm": 0.23705284154886855, "learning_rate": 2.017815979795257e-05, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.1224151998758316, "step": 645, "valid_targets_mean": 8962.2, "valid_targets_min": 2634 }, { "epoch": 3.8707753479125246, "grad_norm": 0.27393497557546315, "learning_rate": 1.9881225928657132e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.11714585870504379, "step": 650, "valid_targets_mean": 9084.2, "valid_targets_min": 3314 }, { "epoch": 3.900596421471173, "grad_norm": 0.24064359347617442, "learning_rate": 1.958431824011176e-05, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.11896838247776031, "step": 655, "valid_targets_mean": 9061.0, "valid_targets_min": 3267 }, { "epoch": 3.930417495029821, "grad_norm": 0.23848724567684518, "learning_rate": 1.928750217813214e-05, "loss": 0.372, "loss_nan_ranks": 0, "loss_rank_avg": 0.12432240694761276, "step": 660, "valid_targets_mean": 9445.7, "valid_targets_min": 3656 }, { "epoch": 3.960238568588469, "grad_norm": 0.23633084097635995, "learning_rate": 1.899084316833722e-05, "loss": 0.3726, "loss_nan_ranks": 0, "loss_rank_avg": 0.12451080232858658, "step": 665, "valid_targets_mean": 9713.7, "valid_targets_min": 3239 }, { "epoch": 3.9900596421471173, "grad_norm": 0.24802678596076022, "learning_rate": 1.869440660172774e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.1269044578075409, "step": 670, "valid_targets_mean": 9837.6, "valid_targets_min": 2370 }, { "epoch": 4.0178926441351885, "grad_norm": 0.23067166950312643, "learning_rate": 1.8398257820272438e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.13049767911434174, "step": 675, "valid_targets_mean": 9200.5, "valid_targets_min": 3730 }, { "epoch": 4.047713717693837, "grad_norm": 0.22687025778414033, "learning_rate": 1.8102462102505096e-05, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.11435438692569733, "step": 680, "valid_targets_mean": 9556.8, "valid_targets_min": 1170 }, { "epoch": 4.077534791252485, "grad_norm": 0.23492036510934328, "learning_rate": 1.7807084649135473e-05, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.11235421150922775, "step": 685, "valid_targets_mean": 8562.3, "valid_targets_min": 2218 }, { "epoch": 4.107355864811133, "grad_norm": 0.22232507076273023, "learning_rate": 1.751219056867751e-05, "loss": 0.3649, "loss_nan_ranks": 0, "loss_rank_avg": 0.12534525990486145, "step": 690, "valid_targets_mean": 9685.4, "valid_targets_min": 3439 }, { "epoch": 4.137176938369781, "grad_norm": 0.2160815037269669, "learning_rate": 1.7217844863097774e-05, "loss": 0.3704, "loss_nan_ranks": 0, "loss_rank_avg": 0.12021255493164062, "step": 695, "valid_targets_mean": 9724.7, "valid_targets_min": 4045 }, { "epoch": 4.166998011928429, "grad_norm": 0.22504176113051863, "learning_rate": 1.6924112413487382e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.12961643934249878, "step": 700, "valid_targets_mean": 10400.0, "valid_targets_min": 3902 }, { "epoch": 4.1968190854870775, "grad_norm": 0.2239146324338242, "learning_rate": 1.6631057965760674e-05, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256789118051529, "step": 705, "valid_targets_mean": 10124.2, "valid_targets_min": 2297 }, { "epoch": 4.226640159045726, "grad_norm": 0.24637631808051266, "learning_rate": 1.633874611638353e-05, "loss": 0.3695, "loss_nan_ranks": 0, "loss_rank_avg": 0.11531466245651245, "step": 710, "valid_targets_mean": 9248.9, "valid_targets_min": 3223 }, { "epoch": 4.256461232604374, "grad_norm": 0.2436999109413977, "learning_rate": 1.6047241298134767e-05, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.12499426305294037, "step": 715, "valid_targets_mean": 8669.4, "valid_targets_min": 2360 }, { "epoch": 4.286282306163022, "grad_norm": 0.23915655247210107, "learning_rate": 1.5756607765903525e-05, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.13046535849571228, "step": 720, "valid_targets_mean": 9301.4, "valid_targets_min": 3807 }, { "epoch": 4.31610337972167, "grad_norm": 0.27408527428068585, "learning_rate": 1.5466909582525893e-05, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.10745998471975327, "step": 725, "valid_targets_mean": 8807.9, "valid_targets_min": 2492 }, { "epoch": 4.345924453280318, "grad_norm": 0.22513801096410269, "learning_rate": 1.51782106046639e-05, "loss": 0.3633, "loss_nan_ranks": 0, "loss_rank_avg": 0.12577703595161438, "step": 730, "valid_targets_mean": 9537.7, "valid_targets_min": 2496 }, { "epoch": 4.3757455268389664, "grad_norm": 0.22189885599414932, "learning_rate": 1.4890574468729893e-05, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.11032432317733765, "step": 735, "valid_targets_mean": 8715.6, "valid_targets_min": 2174 }, { "epoch": 4.405566600397615, "grad_norm": 0.2262677009022974, "learning_rate": 1.4604064576859513e-05, "loss": 0.3695, "loss_nan_ranks": 0, "loss_rank_avg": 0.12237927317619324, "step": 740, "valid_targets_mean": 8992.1, "valid_targets_min": 4707 }, { "epoch": 4.435387673956263, "grad_norm": 0.21753810116362765, "learning_rate": 1.43187440829363e-05, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.13011434674263, "step": 745, "valid_targets_mean": 9628.3, "valid_targets_min": 1150 }, { "epoch": 4.465208747514911, "grad_norm": 0.23166763660646592, "learning_rate": 1.4034675878670964e-05, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.11755146086215973, "step": 750, "valid_targets_mean": 8975.1, "valid_targets_min": 3581 }, { "epoch": 4.495029821073558, "grad_norm": 0.24948530398746774, "learning_rate": 1.3751922579738566e-05, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.1294461041688919, "step": 755, "valid_targets_mean": 9031.1, "valid_targets_min": 2432 }, { "epoch": 4.524850894632207, "grad_norm": 0.22634991751036648, "learning_rate": 1.3470546511976395e-05, "loss": 0.3653, "loss_nan_ranks": 0, "loss_rank_avg": 0.13205093145370483, "step": 760, "valid_targets_mean": 10131.7, "valid_targets_min": 3011 }, { "epoch": 4.5546719681908545, "grad_norm": 0.2407875833801288, "learning_rate": 1.3190609697645882e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.11704811453819275, "step": 765, "valid_targets_mean": 8992.2, "valid_targets_min": 2746 }, { "epoch": 4.584493041749503, "grad_norm": 0.2232911058573211, "learning_rate": 1.2912173841761288e-05, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.12442412227392197, "step": 770, "valid_targets_mean": 10025.8, "valid_targets_min": 4098 }, { "epoch": 4.614314115308151, "grad_norm": 0.21682935538971534, "learning_rate": 1.2635300318488426e-05, "loss": 0.3668, "loss_nan_ranks": 0, "loss_rank_avg": 0.12467293441295624, "step": 775, "valid_targets_mean": 9998.5, "valid_targets_min": 3071 }, { "epoch": 4.644135188866799, "grad_norm": 0.22390557119853882, "learning_rate": 1.236005015761629e-05, "loss": 0.3761, "loss_nan_ranks": 0, "loss_rank_avg": 0.1269168108701706, "step": 780, "valid_targets_mean": 10062.8, "valid_targets_min": 2395 }, { "epoch": 4.673956262425447, "grad_norm": 0.24000520155840155, "learning_rate": 1.2086484031104515e-05, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.11352889239788055, "step": 785, "valid_targets_mean": 8199.0, "valid_targets_min": 3394 }, { "epoch": 4.703777335984095, "grad_norm": 0.21139692598361529, "learning_rate": 1.1814662239709851e-05, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.12804041802883148, "step": 790, "valid_targets_mean": 10180.8, "valid_targets_min": 3169 }, { "epoch": 4.7335984095427435, "grad_norm": 0.23419559497822282, "learning_rate": 1.1544644699694307e-05, "loss": 0.3631, "loss_nan_ranks": 0, "loss_rank_avg": 0.1193591058254242, "step": 795, "valid_targets_mean": 9056.3, "valid_targets_min": 3674 }, { "epoch": 4.763419483101392, "grad_norm": 0.24646021525100137, "learning_rate": 1.1276490929618177e-05, "loss": 0.3653, "loss_nan_ranks": 0, "loss_rank_avg": 0.11579230427742004, "step": 800, "valid_targets_mean": 9170.7, "valid_targets_min": 2159 }, { "epoch": 4.79324055666004, "grad_norm": 0.22174275700505441, "learning_rate": 1.1010260037220643e-05, "loss": 0.3688, "loss_nan_ranks": 0, "loss_rank_avg": 0.12448688596487045, "step": 805, "valid_targets_mean": 9580.0, "valid_targets_min": 1459 }, { "epoch": 4.823061630218688, "grad_norm": 0.22272875692529412, "learning_rate": 1.0746010706390981e-05, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.12802085280418396, "step": 810, "valid_targets_mean": 9987.5, "valid_targets_min": 2232 }, { "epoch": 4.852882703777336, "grad_norm": 0.2138007940278265, "learning_rate": 1.048380118423316e-05, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.12118125706911087, "step": 815, "valid_targets_mean": 9264.9, "valid_targets_min": 2934 }, { "epoch": 4.882703777335984, "grad_norm": 0.22079166609200498, "learning_rate": 1.0223689268226754e-05, "loss": 0.3699, "loss_nan_ranks": 0, "loss_rank_avg": 0.12725740671157837, "step": 820, "valid_targets_mean": 10555.1, "valid_targets_min": 993 }, { "epoch": 4.912524850894632, "grad_norm": 0.21264177919293614, "learning_rate": 9.965732293486929e-06, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.13087576627731323, "step": 825, "valid_targets_mean": 10235.8, "valid_targets_min": 2786 }, { "epoch": 4.942345924453281, "grad_norm": 0.24182464066253673, "learning_rate": 9.709987120126371e-06, "loss": 0.3657, "loss_nan_ranks": 0, "loss_rank_avg": 0.12286906689405441, "step": 830, "valid_targets_mean": 8107.4, "valid_targets_min": 2343 }, { "epoch": 4.972166998011929, "grad_norm": 0.2328727528842246, "learning_rate": 9.456510120721911e-06, "loss": 0.3713, "loss_nan_ranks": 0, "loss_rank_avg": 0.11418526619672775, "step": 835, "valid_targets_mean": 8587.7, "valid_targets_min": 2879 }, { "epoch": 5.0, "grad_norm": 0.28105201212007624, "learning_rate": 9.205357167888595e-06, "loss": 0.3688, "loss_nan_ranks": 0, "loss_rank_avg": 0.17316186428070068, "step": 840, "valid_targets_mean": 9005.3, "valid_targets_min": 3954 }, { "epoch": 5.029821073558648, "grad_norm": 0.30217719030448426, "learning_rate": 8.956583621963996e-06, "loss": 0.3667, "loss_nan_ranks": 0, "loss_rank_avg": 0.11961351335048676, "step": 845, "valid_targets_mean": 9328.7, "valid_targets_min": 2441 }, { "epoch": 5.059642147117296, "grad_norm": 0.2636234100864308, "learning_rate": 8.710244318805406e-06, "loss": 0.3641, "loss_nan_ranks": 0, "loss_rank_avg": 0.11540426313877106, "step": 850, "valid_targets_mean": 9178.5, "valid_targets_min": 2654 }, { "epoch": 5.0894632206759445, "grad_norm": 0.2243454259706472, "learning_rate": 8.466393557702659e-06, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.11149504780769348, "step": 855, "valid_targets_mean": 9037.4, "valid_targets_min": 2353 }, { "epoch": 5.119284294234593, "grad_norm": 0.2149403201957784, "learning_rate": 8.225085089409231e-06, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.1426597237586975, "step": 860, "valid_targets_mean": 10396.4, "valid_targets_min": 3269 }, { "epoch": 5.149105367793241, "grad_norm": 0.22198093615836403, "learning_rate": 7.98637210429422e-06, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.12441132217645645, "step": 865, "valid_targets_mean": 10199.0, "valid_targets_min": 2953 }, { "epoch": 5.178926441351889, "grad_norm": 0.21985018694532593, "learning_rate": 7.750307220617892e-06, "loss": 0.3606, "loss_nan_ranks": 0, "loss_rank_avg": 0.12127161026000977, "step": 870, "valid_targets_mean": 9964.5, "valid_targets_min": 5004 }, { "epoch": 5.208747514910537, "grad_norm": 0.21440275235872072, "learning_rate": 7.5169424729333e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.12604345381259918, "step": 875, "valid_targets_mean": 9950.9, "valid_targets_min": 3104 }, { "epoch": 5.238568588469185, "grad_norm": 0.22878390162081763, "learning_rate": 7.286329300616575e-06, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.13689112663269043, "step": 880, "valid_targets_mean": 10335.2, "valid_targets_min": 3606 }, { "epoch": 5.2683896620278325, "grad_norm": 0.2218645567270629, "learning_rate": 7.058518536528427e-06, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.12371527403593063, "step": 885, "valid_targets_mean": 9315.7, "valid_targets_min": 3386 }, { "epoch": 5.298210735586481, "grad_norm": 0.20525056421525675, "learning_rate": 6.833560395809307e-06, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.12798547744750977, "step": 890, "valid_targets_mean": 9637.3, "valid_targets_min": 2280 }, { "epoch": 5.328031809145129, "grad_norm": 0.22438108847516308, "learning_rate": 6.611504464810754e-06, "loss": 0.3633, "loss_nan_ranks": 0, "loss_rank_avg": 0.12381379306316376, "step": 895, "valid_targets_mean": 9930.8, "valid_targets_min": 3306 }, { "epoch": 5.357852882703777, "grad_norm": 0.2014227570589018, "learning_rate": 6.392399690165328e-06, "loss": 0.3586, "loss_nan_ranks": 0, "loss_rank_avg": 0.10803110897541046, "step": 900, "valid_targets_mean": 8890.2, "valid_targets_min": 2494 }, { "epoch": 5.387673956262425, "grad_norm": 0.20843428620879534, "learning_rate": 6.176294367997564e-06, "loss": 0.3643, "loss_nan_ranks": 0, "loss_rank_avg": 0.1383419781923294, "step": 905, "valid_targets_mean": 10496.9, "valid_targets_min": 2517 }, { "epoch": 5.417495029821073, "grad_norm": 0.2279795640183649, "learning_rate": 5.9632361332783075e-06, "loss": 0.3638, "loss_nan_ranks": 0, "loss_rank_avg": 0.12038109451532364, "step": 910, "valid_targets_mean": 9726.5, "valid_targets_min": 3070 }, { "epoch": 5.4473161033797215, "grad_norm": 0.21438762423589458, "learning_rate": 5.753271949324779e-06, "loss": 0.3678, "loss_nan_ranks": 0, "loss_rank_avg": 0.11860641837120056, "step": 915, "valid_targets_mean": 8619.2, "valid_targets_min": 3587 }, { "epoch": 5.47713717693837, "grad_norm": 0.2042751340473231, "learning_rate": 5.546448097448709e-06, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.12780383229255676, "step": 920, "valid_targets_mean": 10348.6, "valid_targets_min": 3762 }, { "epoch": 5.506958250497018, "grad_norm": 0.20548277545029267, "learning_rate": 5.342810166754773e-06, "loss": 0.3635, "loss_nan_ranks": 0, "loss_rank_avg": 0.10209870338439941, "step": 925, "valid_targets_mean": 8156.8, "valid_targets_min": 2395 }, { "epoch": 5.536779324055666, "grad_norm": 0.21298707779747966, "learning_rate": 5.142403044091635e-06, "loss": 0.3635, "loss_nan_ranks": 0, "loss_rank_avg": 0.12702594697475433, "step": 930, "valid_targets_mean": 10764.0, "valid_targets_min": 4226 }, { "epoch": 5.566600397614314, "grad_norm": 0.21096182884472378, "learning_rate": 4.945270904157766e-06, "loss": 0.3693, "loss_nan_ranks": 0, "loss_rank_avg": 0.1142885610461235, "step": 935, "valid_targets_mean": 8712.3, "valid_targets_min": 3123 }, { "epoch": 5.596421471172962, "grad_norm": 0.2137029350386158, "learning_rate": 4.751457199764249e-06, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.12835736572742462, "step": 940, "valid_targets_mean": 9765.0, "valid_targets_min": 2831 }, { "epoch": 5.6262425447316105, "grad_norm": 0.22355702289752202, "learning_rate": 4.5610046522567e-06, "loss": 0.3718, "loss_nan_ranks": 0, "loss_rank_avg": 0.12691718339920044, "step": 945, "valid_targets_mean": 9729.5, "valid_targets_min": 3585 }, { "epoch": 5.656063618290259, "grad_norm": 0.21492851974551722, "learning_rate": 4.373955242098427e-06, "loss": 0.3683, "loss_nan_ranks": 0, "loss_rank_avg": 0.11885521560907364, "step": 950, "valid_targets_mean": 9381.0, "valid_targets_min": 4095 }, { "epoch": 5.685884691848907, "grad_norm": 0.20233385939777984, "learning_rate": 4.190350199616888e-06, "loss": 0.3672, "loss_nan_ranks": 0, "loss_rank_avg": 0.12096765637397766, "step": 955, "valid_targets_mean": 10414.7, "valid_targets_min": 3631 }, { "epoch": 5.715705765407555, "grad_norm": 0.22049322690289747, "learning_rate": 4.01022999591552e-06, "loss": 0.3625, "loss_nan_ranks": 0, "loss_rank_avg": 0.13061478734016418, "step": 960, "valid_targets_mean": 9200.2, "valid_targets_min": 2105 }, { "epoch": 5.745526838966203, "grad_norm": 0.20901029363729942, "learning_rate": 3.833634333952882e-06, "loss": 0.3584, "loss_nan_ranks": 0, "loss_rank_avg": 0.1363891214132309, "step": 965, "valid_targets_mean": 9776.2, "valid_targets_min": 3927 }, { "epoch": 5.775347912524851, "grad_norm": 0.26991051093015467, "learning_rate": 3.6606021397911605e-06, "loss": 0.3651, "loss_nan_ranks": 0, "loss_rank_avg": 0.12989675998687744, "step": 970, "valid_targets_mean": 10042.0, "valid_targets_min": 3138 }, { "epoch": 5.805168986083499, "grad_norm": 0.19237954867107937, "learning_rate": 3.491171554015886e-06, "loss": 0.3661, "loss_nan_ranks": 0, "loss_rank_avg": 0.10952335596084595, "step": 975, "valid_targets_mean": 9541.4, "valid_targets_min": 2110 }, { "epoch": 5.834990059642147, "grad_norm": 0.2239480154769859, "learning_rate": 3.3253799233288064e-06, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.12480639666318893, "step": 980, "valid_targets_mean": 9702.5, "valid_targets_min": 3851 }, { "epoch": 5.864811133200796, "grad_norm": 0.19907388113430682, "learning_rate": 3.1632637923157517e-06, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.11497035622596741, "step": 985, "valid_targets_mean": 9566.5, "valid_targets_min": 2619 }, { "epoch": 5.894632206759443, "grad_norm": 0.20959374164502262, "learning_rate": 3.004858895391294e-06, "loss": 0.3634, "loss_nan_ranks": 0, "loss_rank_avg": 0.11463524401187897, "step": 990, "valid_targets_mean": 8378.8, "valid_targets_min": 2114 }, { "epoch": 5.924453280318091, "grad_norm": 0.1941612766228534, "learning_rate": 2.8502001489220067e-06, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.11086033284664154, "step": 995, "valid_targets_mean": 8698.3, "valid_targets_min": 353 }, { "epoch": 5.954274353876739, "grad_norm": 0.19209598607356373, "learning_rate": 2.6993216435300194e-06, "loss": 0.3578, "loss_nan_ranks": 0, "loss_rank_avg": 0.1245051771402359, "step": 1000, "valid_targets_mean": 9976.9, "valid_targets_min": 2131 }, { "epoch": 5.9840954274353875, "grad_norm": 0.1970281152353345, "learning_rate": 2.5522566365786094e-06, "loss": 0.3568, "loss_nan_ranks": 0, "loss_rank_avg": 0.12308112531900406, "step": 1005, "valid_targets_mean": 10133.8, "valid_targets_min": 4146 }, { "epoch": 6.01192842942346, "grad_norm": 0.20447659959488607, "learning_rate": 2.4090375448414505e-06, "loss": 0.3636, "loss_nan_ranks": 0, "loss_rank_avg": 0.12563642859458923, "step": 1010, "valid_targets_mean": 9465.2, "valid_targets_min": 2441 }, { "epoch": 6.041749502982108, "grad_norm": 0.20437038455108572, "learning_rate": 2.26969593735715e-06, "loss": 0.3597, "loss_nan_ranks": 0, "loss_rank_avg": 0.11522137373685837, "step": 1015, "valid_targets_mean": 9140.0, "valid_targets_min": 2534 }, { "epoch": 6.071570576540755, "grad_norm": 0.22257426260166877, "learning_rate": 2.1342625284706565e-06, "loss": 0.364, "loss_nan_ranks": 0, "loss_rank_avg": 0.1390356570482254, "step": 1020, "valid_targets_mean": 10661.7, "valid_targets_min": 4246 }, { "epoch": 6.101391650099403, "grad_norm": 0.19365813495938394, "learning_rate": 2.002767171063047e-06, "loss": 0.3585, "loss_nan_ranks": 0, "loss_rank_avg": 0.11969684064388275, "step": 1025, "valid_targets_mean": 9768.3, "valid_targets_min": 5211 }, { "epoch": 6.131212723658051, "grad_norm": 0.20378104515618026, "learning_rate": 1.875238849971226e-06, "loss": 0.3634, "loss_nan_ranks": 0, "loss_rank_avg": 0.1135728657245636, "step": 1030, "valid_targets_mean": 9446.8, "valid_targets_min": 3712 }, { "epoch": 6.1610337972166995, "grad_norm": 0.239141322851671, "learning_rate": 1.7517056755989336e-06, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.12127785384654999, "step": 1035, "valid_targets_mean": 8748.2, "valid_targets_min": 2842 }, { "epoch": 6.190854870775348, "grad_norm": 0.19410931594859926, "learning_rate": 1.6321948777205232e-06, "loss": 0.3594, "loss_nan_ranks": 0, "loss_rank_avg": 0.11646410822868347, "step": 1040, "valid_targets_mean": 8943.3, "valid_targets_min": 2508 }, { "epoch": 6.220675944333996, "grad_norm": 0.19578149244049262, "learning_rate": 1.5167327994788484e-06, "loss": 0.3653, "loss_nan_ranks": 0, "loss_rank_avg": 0.13031047582626343, "step": 1045, "valid_targets_mean": 11259.7, "valid_targets_min": 4629 }, { "epoch": 6.250497017892644, "grad_norm": 0.1934858659957491, "learning_rate": 1.405344891578566e-06, "loss": 0.3558, "loss_nan_ranks": 0, "loss_rank_avg": 0.11782944202423096, "step": 1050, "valid_targets_mean": 10102.9, "valid_targets_min": 5316 }, { "epoch": 6.280318091451292, "grad_norm": 0.19646734609934835, "learning_rate": 1.2980557066761912e-06, "loss": 0.364, "loss_nan_ranks": 0, "loss_rank_avg": 0.1253916323184967, "step": 1055, "valid_targets_mean": 10088.6, "valid_targets_min": 1946 }, { "epoch": 6.31013916500994, "grad_norm": 0.20590729415474335, "learning_rate": 1.1948888939680647e-06, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.12973612546920776, "step": 1060, "valid_targets_mean": 10507.1, "valid_targets_min": 2498 }, { "epoch": 6.3399602385685885, "grad_norm": 0.194011111497666, "learning_rate": 1.0958671939774935e-06, "loss": 0.3563, "loss_nan_ranks": 0, "loss_rank_avg": 0.12194149196147919, "step": 1065, "valid_targets_mean": 9570.9, "valid_targets_min": 2391 }, { "epoch": 6.369781312127237, "grad_norm": 0.20139731344154319, "learning_rate": 1.0010124335421722e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.11091062426567078, "step": 1070, "valid_targets_mean": 9067.2, "valid_targets_min": 2482 }, { "epoch": 6.399602385685885, "grad_norm": 0.19867046434302968, "learning_rate": 9.103455210030066e-07, "loss": 0.3616, "loss_nan_ranks": 0, "loss_rank_avg": 0.11365881562232971, "step": 1075, "valid_targets_mean": 8556.3, "valid_targets_min": 2374 }, { "epoch": 6.429423459244533, "grad_norm": 0.1853582529262351, "learning_rate": 8.238864415954029e-07, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.1302383840084076, "step": 1080, "valid_targets_mean": 9907.8, "valid_targets_min": 2467 }, { "epoch": 6.459244532803181, "grad_norm": 0.20238865724155874, "learning_rate": 7.416542530440174e-07, "loss": 0.3685, "loss_nan_ranks": 0, "loss_rank_avg": 0.1176309734582901, "step": 1085, "valid_targets_mean": 9380.4, "valid_targets_min": 2382 }, { "epoch": 6.489065606361829, "grad_norm": 0.18362044771096164, "learning_rate": 6.636670813619584e-07, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.11380837857723236, "step": 1090, "valid_targets_mean": 10026.3, "valid_targets_min": 4177 }, { "epoch": 6.518886679920477, "grad_norm": 0.20523149374890035, "learning_rate": 5.899421168553887e-07, "loss": 0.3669, "loss_nan_ranks": 0, "loss_rank_avg": 0.12352012097835541, "step": 1095, "valid_targets_mean": 9891.7, "valid_targets_min": 2916 }, { "epoch": 6.548707753479126, "grad_norm": 0.20207344241537156, "learning_rate": 5.204956103343217e-07, "loss": 0.3696, "loss_nan_ranks": 0, "loss_rank_avg": 0.12412673979997635, "step": 1100, "valid_targets_mean": 9182.8, "valid_targets_min": 2008 }, { "epoch": 6.578528827037774, "grad_norm": 0.2681278098515819, "learning_rate": 4.5534286953056617e-07, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.12740249931812286, "step": 1105, "valid_targets_mean": 10408.3, "valid_targets_min": 3345 }, { "epoch": 6.608349900596421, "grad_norm": 0.2219640884288412, "learning_rate": 3.9449825572350777e-07, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.13584564626216888, "step": 1110, "valid_targets_mean": 10947.2, "valid_targets_min": 2619 }, { "epoch": 6.63817097415507, "grad_norm": 0.20125432861092377, "learning_rate": 3.379751805745257e-07, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.12210185825824738, "step": 1115, "valid_targets_mean": 9717.5, "valid_targets_min": 3124 }, { "epoch": 6.667992047713717, "grad_norm": 0.1994889274316608, "learning_rate": 2.857861031707532e-07, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.11578874289989471, "step": 1120, "valid_targets_mean": 8496.7, "valid_targets_min": 2884 }, { "epoch": 6.6978131212723655, "grad_norm": 0.19330788740500873, "learning_rate": 2.3794252727875611e-07, "loss": 0.3562, "loss_nan_ranks": 0, "loss_rank_avg": 0.11735907196998596, "step": 1125, "valid_targets_mean": 9695.8, "valid_targets_min": 2649 }, { "epoch": 6.727634194831014, "grad_norm": 0.21484500199290843, "learning_rate": 1.9445499880883067e-07, "loss": 0.3638, "loss_nan_ranks": 0, "loss_rank_avg": 0.12110140174627304, "step": 1130, "valid_targets_mean": 9903.2, "valid_targets_min": 2360 }, { "epoch": 6.757455268389662, "grad_norm": 0.1965552602542897, "learning_rate": 1.553331034904293e-07, "loss": 0.3603, "loss_nan_ranks": 0, "loss_rank_avg": 0.12987342476844788, "step": 1135, "valid_targets_mean": 10510.9, "valid_targets_min": 4514 }, { "epoch": 6.78727634194831, "grad_norm": 0.19572567045219616, "learning_rate": 1.2058546475921305e-07, "loss": 0.3674, "loss_nan_ranks": 0, "loss_rank_avg": 0.12360957264900208, "step": 1140, "valid_targets_mean": 9654.7, "valid_targets_min": 2551 }, { "epoch": 6.817097415506958, "grad_norm": 0.20329684782457239, "learning_rate": 9.021974185625004e-08, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.12088020890951157, "step": 1145, "valid_targets_mean": 9744.9, "valid_targets_min": 2669 }, { "epoch": 6.846918489065606, "grad_norm": 0.19353349526148012, "learning_rate": 6.424262813971904e-08, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.11435583233833313, "step": 1150, "valid_targets_mean": 9121.9, "valid_targets_min": 1150 }, { "epoch": 6.8767395626242545, "grad_norm": 0.1849991984465579, "learning_rate": 4.2659849609520966e-08, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.12063400447368622, "step": 1155, "valid_targets_mean": 10022.1, "valid_targets_min": 2233 }, { "epoch": 6.906560636182903, "grad_norm": 0.19007497403166831, "learning_rate": 2.5476163645143936e-08, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.11399763822555542, "step": 1160, "valid_targets_mean": 9254.8, "valid_targets_min": 3517 }, { "epoch": 6.936381709741551, "grad_norm": 0.1948261449647107, "learning_rate": 1.2695357957002163e-08, "loss": 0.3579, "loss_nan_ranks": 0, "loss_rank_avg": 0.09775931388139725, "step": 1165, "valid_targets_mean": 8721.5, "valid_targets_min": 2682 }, { "epoch": 6.966202783300199, "grad_norm": 0.21107993966883115, "learning_rate": 4.32024975154155e-09, "loss": 0.3643, "loss_nan_ranks": 0, "loss_rank_avg": 0.11073368787765503, "step": 1170, "valid_targets_mean": 8784.6, "valid_targets_min": 2911 }, { "epoch": 6.996023856858847, "grad_norm": 0.19162092330786054, "learning_rate": 3.5268511025421393e-10, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.11796814203262329, "step": 1175, "valid_targets_mean": 9922.6, "valid_targets_min": 3370 }, { "epoch": 7.0, "step": 1176, "total_flos": 4.68228205146905e+18, "train_loss": 0.0, "train_runtime": 1.3887, "train_samples_per_second": 81133.704, "train_steps_per_second": 846.823 } ], "logging_steps": 5, "max_steps": 1176, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.68228205146905e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }