{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1687, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02074688796680498, "grad_norm": 41.397027140234286, "learning_rate": 9.467455621301776e-07, "loss": 3.331, "loss_nan_ranks": 0, "loss_rank_avg": 2.9795444011688232, "step": 5, "valid_targets_mean": 134.4, "valid_targets_min": 41 }, { "epoch": 0.04149377593360996, "grad_norm": 35.61947240445416, "learning_rate": 2.1301775147929e-06, "loss": 2.722, "loss_nan_ranks": 0, "loss_rank_avg": 2.5642552375793457, "step": 10, "valid_targets_mean": 161.3, "valid_targets_min": 48 }, { "epoch": 0.06224066390041494, "grad_norm": 52.370048634783274, "learning_rate": 3.313609467455622e-06, "loss": 3.1562, "loss_nan_ranks": 0, "loss_rank_avg": 3.396860122680664, "step": 15, "valid_targets_mean": 97.2, "valid_targets_min": 66 }, { "epoch": 0.08298755186721991, "grad_norm": 25.358721084807172, "learning_rate": 4.497041420118343e-06, "loss": 2.9616, "loss_nan_ranks": 0, "loss_rank_avg": 3.084906578063965, "step": 20, "valid_targets_mean": 104.8, "valid_targets_min": 68 }, { "epoch": 0.1037344398340249, "grad_norm": 14.282450323196407, "learning_rate": 5.680473372781066e-06, "loss": 2.4025, "loss_nan_ranks": 0, "loss_rank_avg": 2.6719250679016113, "step": 25, "valid_targets_mean": 96.6, "valid_targets_min": 54 }, { "epoch": 0.12448132780082988, "grad_norm": 6.589012266835091, "learning_rate": 6.863905325443787e-06, "loss": 2.3199, "loss_nan_ranks": 0, "loss_rank_avg": 1.578616976737976, "step": 30, "valid_targets_mean": 189.2, "valid_targets_min": 59 }, { "epoch": 0.14522821576763487, "grad_norm": 8.720894250976365, "learning_rate": 8.04733727810651e-06, "loss": 2.0466, "loss_nan_ranks": 0, "loss_rank_avg": 2.2473416328430176, "step": 35, "valid_targets_mean": 108.4, "valid_targets_min": 59 }, { "epoch": 0.16597510373443983, "grad_norm": 6.572593557764174, "learning_rate": 9.230769230769232e-06, "loss": 2.0725, "loss_nan_ranks": 0, "loss_rank_avg": 2.0674421787261963, "step": 40, "valid_targets_mean": 113.9, "valid_targets_min": 58 }, { "epoch": 0.18672199170124482, "grad_norm": 5.530699449121817, "learning_rate": 1.0414201183431953e-05, "loss": 1.7968, "loss_nan_ranks": 0, "loss_rank_avg": 1.739060878753662, "step": 45, "valid_targets_mean": 115.5, "valid_targets_min": 62 }, { "epoch": 0.2074688796680498, "grad_norm": 4.37749124574813, "learning_rate": 1.1597633136094675e-05, "loss": 1.6917, "loss_nan_ranks": 0, "loss_rank_avg": 1.5368684530258179, "step": 50, "valid_targets_mean": 139.1, "valid_targets_min": 57 }, { "epoch": 0.22821576763485477, "grad_norm": 4.295701199410857, "learning_rate": 1.2781065088757399e-05, "loss": 1.5291, "loss_nan_ranks": 0, "loss_rank_avg": 1.4562065601348877, "step": 55, "valid_targets_mean": 132.5, "valid_targets_min": 55 }, { "epoch": 0.24896265560165975, "grad_norm": 4.084731189779167, "learning_rate": 1.396449704142012e-05, "loss": 1.5082, "loss_nan_ranks": 0, "loss_rank_avg": 1.390439510345459, "step": 60, "valid_targets_mean": 141.6, "valid_targets_min": 47 }, { "epoch": 0.2697095435684647, "grad_norm": 4.96787833573562, "learning_rate": 1.5147928994082842e-05, "loss": 1.3951, "loss_nan_ranks": 0, "loss_rank_avg": 1.4382585287094116, "step": 65, "valid_targets_mean": 94.7, "valid_targets_min": 64 }, { "epoch": 0.29045643153526973, "grad_norm": 4.26457221140469, "learning_rate": 1.6331360946745562e-05, "loss": 1.4802, "loss_nan_ranks": 0, "loss_rank_avg": 1.4862592220306396, "step": 70, "valid_targets_mean": 133.1, "valid_targets_min": 75 }, { "epoch": 0.3112033195020747, "grad_norm": 4.762762510462149, "learning_rate": 1.7514792899408286e-05, "loss": 1.4573, "loss_nan_ranks": 0, "loss_rank_avg": 1.6193333864212036, "step": 75, "valid_targets_mean": 95.4, "valid_targets_min": 62 }, { "epoch": 0.33195020746887965, "grad_norm": 5.211587363436592, "learning_rate": 1.8698224852071007e-05, "loss": 1.3158, "loss_nan_ranks": 0, "loss_rank_avg": 1.5069483518600464, "step": 80, "valid_targets_mean": 85.4, "valid_targets_min": 51 }, { "epoch": 0.35269709543568467, "grad_norm": 5.167018764133475, "learning_rate": 1.9881656804733727e-05, "loss": 1.3183, "loss_nan_ranks": 0, "loss_rank_avg": 1.5408308506011963, "step": 85, "valid_targets_mean": 111.9, "valid_targets_min": 78 }, { "epoch": 0.37344398340248963, "grad_norm": 4.19223324707348, "learning_rate": 2.106508875739645e-05, "loss": 1.1263, "loss_nan_ranks": 0, "loss_rank_avg": 1.0536243915557861, "step": 90, "valid_targets_mean": 109.9, "valid_targets_min": 54 }, { "epoch": 0.3941908713692946, "grad_norm": 4.327426360795543, "learning_rate": 2.224852071005917e-05, "loss": 1.2161, "loss_nan_ranks": 0, "loss_rank_avg": 1.2340021133422852, "step": 95, "valid_targets_mean": 108.8, "valid_targets_min": 47 }, { "epoch": 0.4149377593360996, "grad_norm": 4.407589788387759, "learning_rate": 2.3431952662721896e-05, "loss": 1.179, "loss_nan_ranks": 0, "loss_rank_avg": 1.28630530834198, "step": 100, "valid_targets_mean": 123.1, "valid_targets_min": 78 }, { "epoch": 0.43568464730290457, "grad_norm": 4.476057603578654, "learning_rate": 2.461538461538462e-05, "loss": 1.2358, "loss_nan_ranks": 0, "loss_rank_avg": 1.2336297035217285, "step": 105, "valid_targets_mean": 96.8, "valid_targets_min": 61 }, { "epoch": 0.45643153526970953, "grad_norm": 4.275689963533272, "learning_rate": 2.5798816568047337e-05, "loss": 1.2222, "loss_nan_ranks": 0, "loss_rank_avg": 1.3048899173736572, "step": 110, "valid_targets_mean": 119.9, "valid_targets_min": 60 }, { "epoch": 0.47717842323651455, "grad_norm": 4.114686647723986, "learning_rate": 2.698224852071006e-05, "loss": 1.1238, "loss_nan_ranks": 0, "loss_rank_avg": 1.1282589435577393, "step": 115, "valid_targets_mean": 119.6, "valid_targets_min": 57 }, { "epoch": 0.4979253112033195, "grad_norm": 2.848802193178965, "learning_rate": 2.8165680473372784e-05, "loss": 1.1292, "loss_nan_ranks": 0, "loss_rank_avg": 0.9970792531967163, "step": 120, "valid_targets_mean": 197.2, "valid_targets_min": 70 }, { "epoch": 0.5186721991701245, "grad_norm": 3.917339058459324, "learning_rate": 2.9349112426035505e-05, "loss": 1.2792, "loss_nan_ranks": 0, "loss_rank_avg": 1.0999289751052856, "step": 125, "valid_targets_mean": 106.1, "valid_targets_min": 50 }, { "epoch": 0.5394190871369294, "grad_norm": 4.973393373968101, "learning_rate": 3.0532544378698226e-05, "loss": 1.2547, "loss_nan_ranks": 0, "loss_rank_avg": 1.5572994947433472, "step": 130, "valid_targets_mean": 93.6, "valid_targets_min": 48 }, { "epoch": 0.5601659751037344, "grad_norm": 4.093849799808453, "learning_rate": 3.171597633136095e-05, "loss": 1.1989, "loss_nan_ranks": 0, "loss_rank_avg": 1.289677381515503, "step": 135, "valid_targets_mean": 117.5, "valid_targets_min": 56 }, { "epoch": 0.5809128630705395, "grad_norm": 4.130110837411625, "learning_rate": 3.289940828402367e-05, "loss": 1.291, "loss_nan_ranks": 0, "loss_rank_avg": 1.284778356552124, "step": 140, "valid_targets_mean": 104.9, "valid_targets_min": 59 }, { "epoch": 0.6016597510373444, "grad_norm": 5.236925712153894, "learning_rate": 3.40828402366864e-05, "loss": 1.2244, "loss_nan_ranks": 0, "loss_rank_avg": 1.3275096416473389, "step": 145, "valid_targets_mean": 93.9, "valid_targets_min": 52 }, { "epoch": 0.6224066390041494, "grad_norm": 4.0627237296792185, "learning_rate": 3.5266272189349114e-05, "loss": 1.1273, "loss_nan_ranks": 0, "loss_rank_avg": 1.3301905393600464, "step": 150, "valid_targets_mean": 118.0, "valid_targets_min": 82 }, { "epoch": 0.6431535269709544, "grad_norm": 3.6554328860688985, "learning_rate": 3.644970414201184e-05, "loss": 1.0308, "loss_nan_ranks": 0, "loss_rank_avg": 1.0490658283233643, "step": 155, "valid_targets_mean": 149.6, "valid_targets_min": 53 }, { "epoch": 0.6639004149377593, "grad_norm": 4.388884052961649, "learning_rate": 3.763313609467456e-05, "loss": 0.9736, "loss_nan_ranks": 0, "loss_rank_avg": 1.3347870111465454, "step": 160, "valid_targets_mean": 87.1, "valid_targets_min": 46 }, { "epoch": 0.6846473029045643, "grad_norm": 4.283544953396232, "learning_rate": 3.881656804733728e-05, "loss": 1.166, "loss_nan_ranks": 0, "loss_rank_avg": 1.3210680484771729, "step": 165, "valid_targets_mean": 119.6, "valid_targets_min": 61 }, { "epoch": 0.7053941908713693, "grad_norm": 4.3275598749896504, "learning_rate": 4e-05, "loss": 1.0946, "loss_nan_ranks": 0, "loss_rank_avg": 1.173753023147583, "step": 170, "valid_targets_mean": 94.1, "valid_targets_min": 63 }, { "epoch": 0.7261410788381742, "grad_norm": 3.133490340424578, "learning_rate": 3.999892923951514e-05, "loss": 1.0991, "loss_nan_ranks": 0, "loss_rank_avg": 0.9916005730628967, "step": 175, "valid_targets_mean": 175.0, "valid_targets_min": 65 }, { "epoch": 0.7468879668049793, "grad_norm": 3.820724592316591, "learning_rate": 3.999571707271335e-05, "loss": 1.1184, "loss_nan_ranks": 0, "loss_rank_avg": 1.162503719329834, "step": 180, "valid_targets_mean": 116.2, "valid_targets_min": 66 }, { "epoch": 0.7676348547717843, "grad_norm": 3.1153583861325584, "learning_rate": 3.999036384354076e-05, "loss": 1.0885, "loss_nan_ranks": 0, "loss_rank_avg": 1.132462501525879, "step": 185, "valid_targets_mean": 155.9, "valid_targets_min": 55 }, { "epoch": 0.7883817427385892, "grad_norm": 3.2268572394739867, "learning_rate": 3.99828701252e-05, "loss": 1.1776, "loss_nan_ranks": 0, "loss_rank_avg": 1.0295939445495605, "step": 190, "valid_targets_mean": 146.7, "valid_targets_min": 68 }, { "epoch": 0.8091286307053942, "grad_norm": 4.014232602275577, "learning_rate": 3.997323672008881e-05, "loss": 1.1762, "loss_nan_ranks": 0, "loss_rank_avg": 1.3584688901901245, "step": 195, "valid_targets_mean": 106.2, "valid_targets_min": 56 }, { "epoch": 0.8298755186721992, "grad_norm": 2.894292449308229, "learning_rate": 3.9961464659714154e-05, "loss": 1.0519, "loss_nan_ranks": 0, "loss_rank_avg": 0.8366074562072754, "step": 200, "valid_targets_mean": 144.6, "valid_targets_min": 63 }, { "epoch": 0.8506224066390041, "grad_norm": 3.346195548737324, "learning_rate": 3.994755520458173e-05, "loss": 1.2221, "loss_nan_ranks": 0, "loss_rank_avg": 1.131117343902588, "step": 205, "valid_targets_mean": 118.6, "valid_targets_min": 61 }, { "epoch": 0.8713692946058091, "grad_norm": 2.861208912009083, "learning_rate": 3.9931509844061034e-05, "loss": 1.1059, "loss_nan_ranks": 0, "loss_rank_avg": 0.914353609085083, "step": 210, "valid_targets_mean": 161.1, "valid_targets_min": 68 }, { "epoch": 0.8921161825726142, "grad_norm": 2.4635047614518433, "learning_rate": 3.991333029622587e-05, "loss": 1.0881, "loss_nan_ranks": 0, "loss_rank_avg": 0.7619673609733582, "step": 215, "valid_targets_mean": 187.9, "valid_targets_min": 49 }, { "epoch": 0.9128630705394191, "grad_norm": 3.439388327039743, "learning_rate": 3.9893018507670384e-05, "loss": 1.0801, "loss_nan_ranks": 0, "loss_rank_avg": 1.1252914667129517, "step": 220, "valid_targets_mean": 128.8, "valid_targets_min": 66 }, { "epoch": 0.9336099585062241, "grad_norm": 3.0479402830613016, "learning_rate": 3.987057665330063e-05, "loss": 1.1093, "loss_nan_ranks": 0, "loss_rank_avg": 1.052093505859375, "step": 225, "valid_targets_mean": 155.6, "valid_targets_min": 63 }, { "epoch": 0.9543568464730291, "grad_norm": 3.4173751722238928, "learning_rate": 3.984600713610169e-05, "loss": 1.043, "loss_nan_ranks": 0, "loss_rank_avg": 0.9762424230575562, "step": 230, "valid_targets_mean": 115.1, "valid_targets_min": 66 }, { "epoch": 0.975103734439834, "grad_norm": 3.203880033800484, "learning_rate": 3.981931258688038e-05, "loss": 1.1468, "loss_nan_ranks": 0, "loss_rank_avg": 1.121521234512329, "step": 235, "valid_targets_mean": 137.5, "valid_targets_min": 68 }, { "epoch": 0.995850622406639, "grad_norm": 3.4770939413356463, "learning_rate": 3.979049586398355e-05, "loss": 1.1073, "loss_nan_ranks": 0, "loss_rank_avg": 1.1996042728424072, "step": 240, "valid_targets_mean": 117.0, "valid_targets_min": 46 }, { "epoch": 1.016597510373444, "grad_norm": 3.930284483527447, "learning_rate": 3.975956005299202e-05, "loss": 1.1062, "loss_nan_ranks": 0, "loss_rank_avg": 1.1427793502807617, "step": 245, "valid_targets_mean": 93.7, "valid_targets_min": 50 }, { "epoch": 1.037344398340249, "grad_norm": 3.4787980480099634, "learning_rate": 3.972650846639019e-05, "loss": 0.9987, "loss_nan_ranks": 0, "loss_rank_avg": 0.8831626176834106, "step": 250, "valid_targets_mean": 117.0, "valid_targets_min": 51 }, { "epoch": 1.058091286307054, "grad_norm": 5.765404317940215, "learning_rate": 3.9691344643211346e-05, "loss": 0.9541, "loss_nan_ranks": 0, "loss_rank_avg": 1.1344108581542969, "step": 255, "valid_targets_mean": 104.3, "valid_targets_min": 63 }, { "epoch": 1.0788381742738589, "grad_norm": 3.77909701623929, "learning_rate": 3.965407234865871e-05, "loss": 1.0478, "loss_nan_ranks": 0, "loss_rank_avg": 1.0921099185943604, "step": 260, "valid_targets_mean": 114.2, "valid_targets_min": 59 }, { "epoch": 1.099585062240664, "grad_norm": 3.026006206344685, "learning_rate": 3.9614695573702325e-05, "loss": 0.9397, "loss_nan_ranks": 0, "loss_rank_avg": 0.7856882810592651, "step": 265, "valid_targets_mean": 144.1, "valid_targets_min": 64 }, { "epoch": 1.120331950207469, "grad_norm": 2.4834572794776446, "learning_rate": 3.957321853465163e-05, "loss": 0.9561, "loss_nan_ranks": 0, "loss_rank_avg": 0.7817927598953247, "step": 270, "valid_targets_mean": 190.4, "valid_targets_min": 55 }, { "epoch": 1.1410788381742738, "grad_norm": 3.056967616081349, "learning_rate": 3.952964567270409e-05, "loss": 0.9533, "loss_nan_ranks": 0, "loss_rank_avg": 0.8634785413742065, "step": 275, "valid_targets_mean": 134.5, "valid_targets_min": 49 }, { "epoch": 1.161825726141079, "grad_norm": 2.5906209217918117, "learning_rate": 3.9483981653469586e-05, "loss": 0.9866, "loss_nan_ranks": 0, "loss_rank_avg": 0.7464009523391724, "step": 280, "valid_targets_mean": 179.6, "valid_targets_min": 57 }, { "epoch": 1.1825726141078838, "grad_norm": 3.2705129782461686, "learning_rate": 3.9436231366470836e-05, "loss": 1.0861, "loss_nan_ranks": 0, "loss_rank_avg": 0.9500118494033813, "step": 285, "valid_targets_mean": 130.6, "valid_targets_min": 64 }, { "epoch": 1.2033195020746887, "grad_norm": 2.8686194044847473, "learning_rate": 3.93863999246199e-05, "loss": 0.8913, "loss_nan_ranks": 0, "loss_rank_avg": 0.7544863224029541, "step": 290, "valid_targets_mean": 159.8, "valid_targets_min": 63 }, { "epoch": 1.2240663900414939, "grad_norm": 2.4839411870748935, "learning_rate": 3.933449266367066e-05, "loss": 0.9027, "loss_nan_ranks": 0, "loss_rank_avg": 0.6904275417327881, "step": 295, "valid_targets_mean": 166.6, "valid_targets_min": 44 }, { "epoch": 1.2448132780082988, "grad_norm": 3.1275981909890938, "learning_rate": 3.92805151416475e-05, "loss": 0.984, "loss_nan_ranks": 0, "loss_rank_avg": 1.0172083377838135, "step": 300, "valid_targets_mean": 148.0, "valid_targets_min": 68 }, { "epoch": 1.2655601659751037, "grad_norm": 3.6194049895575517, "learning_rate": 3.9224473138250186e-05, "loss": 0.9891, "loss_nan_ranks": 0, "loss_rank_avg": 0.9883549213409424, "step": 305, "valid_targets_mean": 116.4, "valid_targets_min": 52 }, { "epoch": 1.2863070539419086, "grad_norm": 3.426981706024395, "learning_rate": 3.9166372654235e-05, "loss": 0.8473, "loss_nan_ranks": 0, "loss_rank_avg": 0.7937560677528381, "step": 310, "valid_targets_mean": 118.4, "valid_targets_min": 46 }, { "epoch": 1.3070539419087137, "grad_norm": 3.152855357782168, "learning_rate": 3.9106219910772184e-05, "loss": 0.9896, "loss_nan_ranks": 0, "loss_rank_avg": 0.8297359943389893, "step": 315, "valid_targets_mean": 124.4, "valid_targets_min": 68 }, { "epoch": 1.3278008298755186, "grad_norm": 3.069176743069222, "learning_rate": 3.90440213487798e-05, "loss": 0.9571, "loss_nan_ranks": 0, "loss_rank_avg": 0.8529037237167358, "step": 320, "valid_targets_mean": 140.6, "valid_targets_min": 58 }, { "epoch": 1.3485477178423237, "grad_norm": 3.3034611009209978, "learning_rate": 3.897978362823411e-05, "loss": 0.9725, "loss_nan_ranks": 0, "loss_rank_avg": 1.0133411884307861, "step": 325, "valid_targets_mean": 133.8, "valid_targets_min": 65 }, { "epoch": 1.3692946058091287, "grad_norm": 3.5392784144202873, "learning_rate": 3.8913513627456374e-05, "loss": 1.0897, "loss_nan_ranks": 0, "loss_rank_avg": 1.0982413291931152, "step": 330, "valid_targets_mean": 108.6, "valid_targets_min": 47 }, { "epoch": 1.3900414937759336, "grad_norm": 4.133104453790177, "learning_rate": 3.8845218442376416e-05, "loss": 1.0984, "loss_nan_ranks": 0, "loss_rank_avg": 1.1011841297149658, "step": 335, "valid_targets_mean": 92.4, "valid_targets_min": 58 }, { "epoch": 1.4107883817427385, "grad_norm": 2.911694581277071, "learning_rate": 3.877490538577278e-05, "loss": 0.9807, "loss_nan_ranks": 0, "loss_rank_avg": 0.8329280614852905, "step": 340, "valid_targets_mean": 146.0, "valid_targets_min": 69 }, { "epoch": 1.4315352697095436, "grad_norm": 3.7192204333227976, "learning_rate": 3.870258198648974e-05, "loss": 0.8938, "loss_nan_ranks": 0, "loss_rank_avg": 0.9809222221374512, "step": 345, "valid_targets_mean": 112.0, "valid_targets_min": 73 }, { "epoch": 1.4522821576763485, "grad_norm": 4.515200149712003, "learning_rate": 3.862825598863108e-05, "loss": 0.9289, "loss_nan_ranks": 0, "loss_rank_avg": 0.774828314781189, "step": 350, "valid_targets_mean": 201.0, "valid_targets_min": 64 }, { "epoch": 1.4730290456431536, "grad_norm": 3.36292777919316, "learning_rate": 3.855193535073097e-05, "loss": 0.9448, "loss_nan_ranks": 0, "loss_rank_avg": 1.003603458404541, "step": 355, "valid_targets_mean": 131.2, "valid_targets_min": 58 }, { "epoch": 1.4937759336099585, "grad_norm": 3.3587994781305976, "learning_rate": 3.847362824490173e-05, "loss": 0.9142, "loss_nan_ranks": 0, "loss_rank_avg": 0.8775392174720764, "step": 360, "valid_targets_mean": 119.1, "valid_targets_min": 60 }, { "epoch": 1.5145228215767634, "grad_norm": 4.141295017158325, "learning_rate": 3.839334305595881e-05, "loss": 0.9044, "loss_nan_ranks": 0, "loss_rank_avg": 0.9412416219711304, "step": 365, "valid_targets_mean": 143.9, "valid_targets_min": 57 }, { "epoch": 1.5352697095435683, "grad_norm": 2.9785125564157107, "learning_rate": 3.831108838052301e-05, "loss": 1.0486, "loss_nan_ranks": 0, "loss_rank_avg": 0.7641946077346802, "step": 370, "valid_targets_mean": 147.4, "valid_targets_min": 64 }, { "epoch": 1.5560165975103735, "grad_norm": 4.1021332056584, "learning_rate": 3.822687302609994e-05, "loss": 1.0625, "loss_nan_ranks": 0, "loss_rank_avg": 1.2731382846832275, "step": 375, "valid_targets_mean": 96.9, "valid_targets_min": 59 }, { "epoch": 1.5767634854771784, "grad_norm": 3.4014528210970743, "learning_rate": 3.814070601013697e-05, "loss": 0.99, "loss_nan_ranks": 0, "loss_rank_avg": 1.0162218809127808, "step": 380, "valid_targets_mean": 117.1, "valid_targets_min": 67 }, { "epoch": 1.5975103734439835, "grad_norm": 3.998857883960084, "learning_rate": 3.8052596559057674e-05, "loss": 1.0098, "loss_nan_ranks": 0, "loss_rank_avg": 1.1427373886108398, "step": 385, "valid_targets_mean": 94.8, "valid_targets_min": 63 }, { "epoch": 1.6182572614107884, "grad_norm": 3.573313288898261, "learning_rate": 3.7962554107273926e-05, "loss": 0.9996, "loss_nan_ranks": 0, "loss_rank_avg": 0.9803476333618164, "step": 390, "valid_targets_mean": 125.8, "valid_targets_min": 64 }, { "epoch": 1.6390041493775933, "grad_norm": 2.627525409761934, "learning_rate": 3.7870588296175644e-05, "loss": 0.9104, "loss_nan_ranks": 0, "loss_rank_avg": 0.8428837060928345, "step": 395, "valid_targets_mean": 185.5, "valid_targets_min": 54 }, { "epoch": 1.6597510373443982, "grad_norm": 3.8364107288625786, "learning_rate": 3.7776708973098476e-05, "loss": 0.9904, "loss_nan_ranks": 0, "loss_rank_avg": 1.069312334060669, "step": 400, "valid_targets_mean": 105.6, "valid_targets_min": 63 }, { "epoch": 1.6804979253112033, "grad_norm": 2.548205520795793, "learning_rate": 3.768092619026937e-05, "loss": 1.0073, "loss_nan_ranks": 0, "loss_rank_avg": 0.7841141223907471, "step": 405, "valid_targets_mean": 155.5, "valid_targets_min": 62 }, { "epoch": 1.7012448132780082, "grad_norm": 3.382526642855817, "learning_rate": 3.7583250203730234e-05, "loss": 0.9565, "loss_nan_ranks": 0, "loss_rank_avg": 0.8623814582824707, "step": 410, "valid_targets_mean": 124.7, "valid_targets_min": 69 }, { "epoch": 1.7219917012448134, "grad_norm": 4.314083908492832, "learning_rate": 3.7483691472239744e-05, "loss": 1.0132, "loss_nan_ranks": 0, "loss_rank_avg": 1.0424532890319824, "step": 415, "valid_targets_mean": 93.2, "valid_targets_min": 60 }, { "epoch": 1.7427385892116183, "grad_norm": 2.5482144454375693, "learning_rate": 3.7382260656153436e-05, "loss": 0.8144, "loss_nan_ranks": 0, "loss_rank_avg": 0.6370407342910767, "step": 420, "valid_targets_mean": 142.7, "valid_targets_min": 61 }, { "epoch": 1.7634854771784232, "grad_norm": 2.6674353520861014, "learning_rate": 3.727896861628231e-05, "loss": 0.9513, "loss_nan_ranks": 0, "loss_rank_avg": 0.7452382445335388, "step": 425, "valid_targets_mean": 178.5, "valid_targets_min": 51 }, { "epoch": 1.784232365145228, "grad_norm": 3.494532029292696, "learning_rate": 3.717382641272984e-05, "loss": 0.9952, "loss_nan_ranks": 0, "loss_rank_avg": 0.999313235282898, "step": 430, "valid_targets_mean": 100.6, "valid_targets_min": 41 }, { "epoch": 1.8049792531120332, "grad_norm": 2.11108790003265, "learning_rate": 3.7066845303707694e-05, "loss": 0.8269, "loss_nan_ranks": 0, "loss_rank_avg": 0.5311782360076904, "step": 435, "valid_targets_mean": 236.6, "valid_targets_min": 57 }, { "epoch": 1.8257261410788381, "grad_norm": 3.9114163853143737, "learning_rate": 3.6958036744330297e-05, "loss": 1.0451, "loss_nan_ranks": 0, "loss_rank_avg": 1.1796157360076904, "step": 440, "valid_targets_mean": 100.8, "valid_targets_min": 65 }, { "epoch": 1.8464730290456433, "grad_norm": 2.966917119742618, "learning_rate": 3.6847412385388236e-05, "loss": 0.8965, "loss_nan_ranks": 0, "loss_rank_avg": 0.8532978892326355, "step": 445, "valid_targets_mean": 140.1, "valid_targets_min": 61 }, { "epoch": 1.8672199170124482, "grad_norm": 3.599518479860152, "learning_rate": 3.673498407210073e-05, "loss": 0.9751, "loss_nan_ranks": 0, "loss_rank_avg": 0.9784261584281921, "step": 450, "valid_targets_mean": 105.6, "valid_targets_min": 53 }, { "epoch": 1.887966804979253, "grad_norm": 3.940460990497638, "learning_rate": 3.662076384284729e-05, "loss": 0.9165, "loss_nan_ranks": 0, "loss_rank_avg": 1.0794143676757812, "step": 455, "valid_targets_mean": 93.3, "valid_targets_min": 56 }, { "epoch": 1.908713692946058, "grad_norm": 2.4631143719909656, "learning_rate": 3.650476392787873e-05, "loss": 0.9068, "loss_nan_ranks": 0, "loss_rank_avg": 0.7837638258934021, "step": 460, "valid_targets_mean": 185.1, "valid_targets_min": 69 }, { "epoch": 1.929460580912863, "grad_norm": 3.725991171844277, "learning_rate": 3.638699674800758e-05, "loss": 0.9429, "loss_nan_ranks": 0, "loss_rank_avg": 1.1260584592819214, "step": 465, "valid_targets_mean": 108.4, "valid_targets_min": 66 }, { "epoch": 1.950207468879668, "grad_norm": 3.3343720514690145, "learning_rate": 3.6267474913278086e-05, "loss": 1.0307, "loss_nan_ranks": 0, "loss_rank_avg": 0.9506238698959351, "step": 470, "valid_targets_mean": 127.8, "valid_targets_min": 50 }, { "epoch": 1.9709543568464731, "grad_norm": 3.827770704987103, "learning_rate": 3.614621122161603e-05, "loss": 1.0214, "loss_nan_ranks": 0, "loss_rank_avg": 1.1068814992904663, "step": 475, "valid_targets_mean": 105.6, "valid_targets_min": 41 }, { "epoch": 1.991701244813278, "grad_norm": 3.861407678400857, "learning_rate": 3.6023218657458334e-05, "loss": 1.0285, "loss_nan_ranks": 0, "loss_rank_avg": 1.1757726669311523, "step": 480, "valid_targets_mean": 103.8, "valid_targets_min": 64 }, { "epoch": 2.012448132780083, "grad_norm": 3.1202708919801934, "learning_rate": 3.589851039036277e-05, "loss": 0.8016, "loss_nan_ranks": 0, "loss_rank_avg": 0.7584832310676575, "step": 485, "valid_targets_mean": 110.2, "valid_targets_min": 56 }, { "epoch": 2.033195020746888, "grad_norm": 3.8354655021211514, "learning_rate": 3.577209977359778e-05, "loss": 0.697, "loss_nan_ranks": 0, "loss_rank_avg": 0.8398362398147583, "step": 490, "valid_targets_mean": 103.0, "valid_targets_min": 57 }, { "epoch": 2.0539419087136928, "grad_norm": 4.329441765322654, "learning_rate": 3.5644000342712695e-05, "loss": 0.803, "loss_nan_ranks": 0, "loss_rank_avg": 0.9269231557846069, "step": 495, "valid_targets_mean": 100.8, "valid_targets_min": 67 }, { "epoch": 2.074688796680498, "grad_norm": 4.4746136552596845, "learning_rate": 3.55142258140884e-05, "loss": 0.6451, "loss_nan_ranks": 0, "loss_rank_avg": 0.8513693809509277, "step": 500, "valid_targets_mean": 110.4, "valid_targets_min": 59 }, { "epoch": 2.095435684647303, "grad_norm": 4.6090358834641565, "learning_rate": 3.538279008346861e-05, "loss": 0.7953, "loss_nan_ranks": 0, "loss_rank_avg": 0.8998103141784668, "step": 505, "valid_targets_mean": 84.5, "valid_targets_min": 63 }, { "epoch": 2.116182572614108, "grad_norm": 3.5628641412692224, "learning_rate": 3.524970722447197e-05, "loss": 0.6491, "loss_nan_ranks": 0, "loss_rank_avg": 0.6803164482116699, "step": 510, "valid_targets_mean": 125.2, "valid_targets_min": 55 }, { "epoch": 2.136929460580913, "grad_norm": 4.445807529078514, "learning_rate": 3.511499148708517e-05, "loss": 0.8269, "loss_nan_ranks": 0, "loss_rank_avg": 0.9091898202896118, "step": 515, "valid_targets_mean": 89.8, "valid_targets_min": 55 }, { "epoch": 2.1576763485477177, "grad_norm": 3.696118063258985, "learning_rate": 3.497865729613702e-05, "loss": 0.7598, "loss_nan_ranks": 0, "loss_rank_avg": 0.738641619682312, "step": 520, "valid_targets_mean": 128.2, "valid_targets_min": 60 }, { "epoch": 2.1784232365145226, "grad_norm": 3.9014408920828285, "learning_rate": 3.484071924975398e-05, "loss": 0.8091, "loss_nan_ranks": 0, "loss_rank_avg": 0.7708579301834106, "step": 525, "valid_targets_mean": 115.0, "valid_targets_min": 44 }, { "epoch": 2.199170124481328, "grad_norm": 4.305295137986893, "learning_rate": 3.4701192117796964e-05, "loss": 0.7631, "loss_nan_ranks": 0, "loss_rank_avg": 0.7537413835525513, "step": 530, "valid_targets_mean": 99.2, "valid_targets_min": 61 }, { "epoch": 2.219917012448133, "grad_norm": 3.4539393109108762, "learning_rate": 3.456009084027995e-05, "loss": 0.696, "loss_nan_ranks": 0, "loss_rank_avg": 0.6141202449798584, "step": 535, "valid_targets_mean": 142.5, "valid_targets_min": 67 }, { "epoch": 2.240663900414938, "grad_norm": 3.499033553308627, "learning_rate": 3.441743052577014e-05, "loss": 0.7411, "loss_nan_ranks": 0, "loss_rank_avg": 0.6117064952850342, "step": 540, "valid_targets_mean": 134.6, "valid_targets_min": 66 }, { "epoch": 2.2614107883817427, "grad_norm": 4.012715094473001, "learning_rate": 3.4273226449770314e-05, "loss": 0.7664, "loss_nan_ranks": 0, "loss_rank_avg": 0.848983645439148, "step": 545, "valid_targets_mean": 108.5, "valid_targets_min": 66 }, { "epoch": 2.2821576763485476, "grad_norm": 4.829214040777758, "learning_rate": 3.4127494053083086e-05, "loss": 0.6762, "loss_nan_ranks": 0, "loss_rank_avg": 0.8868389129638672, "step": 550, "valid_targets_mean": 86.9, "valid_targets_min": 62 }, { "epoch": 2.3029045643153525, "grad_norm": 3.9749139794203345, "learning_rate": 3.398024894015764e-05, "loss": 0.6597, "loss_nan_ranks": 0, "loss_rank_avg": 0.8123997449874878, "step": 555, "valid_targets_mean": 119.9, "valid_targets_min": 72 }, { "epoch": 2.323651452282158, "grad_norm": 4.34876414676235, "learning_rate": 3.383150687741883e-05, "loss": 0.8247, "loss_nan_ranks": 0, "loss_rank_avg": 0.9735461473464966, "step": 560, "valid_targets_mean": 99.9, "valid_targets_min": 52 }, { "epoch": 2.3443983402489628, "grad_norm": 3.912739845823611, "learning_rate": 3.368128379157897e-05, "loss": 0.7557, "loss_nan_ranks": 0, "loss_rank_avg": 0.7604776620864868, "step": 565, "valid_targets_mean": 119.3, "valid_targets_min": 64 }, { "epoch": 2.3651452282157677, "grad_norm": 4.185361795583942, "learning_rate": 3.3529595767932496e-05, "loss": 0.7067, "loss_nan_ranks": 0, "loss_rank_avg": 0.722848653793335, "step": 570, "valid_targets_mean": 133.8, "valid_targets_min": 71 }, { "epoch": 2.3858921161825726, "grad_norm": 3.7895338790081166, "learning_rate": 3.3376459048633565e-05, "loss": 0.8143, "loss_nan_ranks": 0, "loss_rank_avg": 0.7524533271789551, "step": 575, "valid_targets_mean": 126.1, "valid_targets_min": 53 }, { "epoch": 2.4066390041493775, "grad_norm": 4.1719327780746225, "learning_rate": 3.322189003095696e-05, "loss": 0.7022, "loss_nan_ranks": 0, "loss_rank_avg": 0.7856139540672302, "step": 580, "valid_targets_mean": 121.4, "valid_targets_min": 55 }, { "epoch": 2.4273858921161824, "grad_norm": 3.7130870658654795, "learning_rate": 3.306590526554233e-05, "loss": 0.8357, "loss_nan_ranks": 0, "loss_rank_avg": 0.8768580555915833, "step": 585, "valid_targets_mean": 129.4, "valid_targets_min": 47 }, { "epoch": 2.4481327800829877, "grad_norm": 4.362956565915498, "learning_rate": 3.290852145462196e-05, "loss": 0.8137, "loss_nan_ranks": 0, "loss_rank_avg": 0.8842595815658569, "step": 590, "valid_targets_mean": 99.6, "valid_targets_min": 66 }, { "epoch": 2.4688796680497926, "grad_norm": 4.836605175013781, "learning_rate": 3.274975545023242e-05, "loss": 0.8062, "loss_nan_ranks": 0, "loss_rank_avg": 0.9917868375778198, "step": 595, "valid_targets_mean": 92.1, "valid_targets_min": 61 }, { "epoch": 2.4896265560165975, "grad_norm": 4.355290465359613, "learning_rate": 3.258962425241011e-05, "loss": 0.8078, "loss_nan_ranks": 0, "loss_rank_avg": 0.9537732601165771, "step": 600, "valid_targets_mean": 113.8, "valid_targets_min": 60 }, { "epoch": 2.5103734439834025, "grad_norm": 3.605491208223676, "learning_rate": 3.242814500737092e-05, "loss": 0.7583, "loss_nan_ranks": 0, "loss_rank_avg": 0.6148207187652588, "step": 605, "valid_targets_mean": 158.1, "valid_targets_min": 37 }, { "epoch": 2.5311203319502074, "grad_norm": 4.3713729869832845, "learning_rate": 3.226533500567433e-05, "loss": 0.8238, "loss_nan_ranks": 0, "loss_rank_avg": 0.8725237846374512, "step": 610, "valid_targets_mean": 104.8, "valid_targets_min": 58 }, { "epoch": 2.5518672199170123, "grad_norm": 4.489442159400303, "learning_rate": 3.2101211680371965e-05, "loss": 0.8316, "loss_nan_ranks": 0, "loss_rank_avg": 0.9407273530960083, "step": 615, "valid_targets_mean": 98.4, "valid_targets_min": 57 }, { "epoch": 2.572614107883817, "grad_norm": 4.291817799537676, "learning_rate": 3.193579260514097e-05, "loss": 0.8382, "loss_nan_ranks": 0, "loss_rank_avg": 0.8661283254623413, "step": 620, "valid_targets_mean": 88.4, "valid_targets_min": 62 }, { "epoch": 2.5933609958506225, "grad_norm": 3.951278391034252, "learning_rate": 3.176909549240226e-05, "loss": 0.797, "loss_nan_ranks": 0, "loss_rank_avg": 0.6531973481178284, "step": 625, "valid_targets_mean": 127.5, "valid_targets_min": 56 }, { "epoch": 2.6141078838174274, "grad_norm": 3.6399446812749994, "learning_rate": 3.1601138191423966e-05, "loss": 0.7022, "loss_nan_ranks": 0, "loss_rank_avg": 0.821446418762207, "step": 630, "valid_targets_mean": 158.7, "valid_targets_min": 64 }, { "epoch": 2.6348547717842323, "grad_norm": 3.783299086234017, "learning_rate": 3.143193868641019e-05, "loss": 0.754, "loss_nan_ranks": 0, "loss_rank_avg": 0.7142607569694519, "step": 635, "valid_targets_mean": 128.2, "valid_targets_min": 61 }, { "epoch": 2.6556016597510372, "grad_norm": 4.253882499474148, "learning_rate": 3.1261515094575335e-05, "loss": 0.7797, "loss_nan_ranks": 0, "loss_rank_avg": 0.8119747638702393, "step": 640, "valid_targets_mean": 93.5, "valid_targets_min": 59 }, { "epoch": 2.6763485477178426, "grad_norm": 4.113442911406122, "learning_rate": 3.108988566420417e-05, "loss": 0.6938, "loss_nan_ranks": 0, "loss_rank_avg": 0.7523815631866455, "step": 645, "valid_targets_mean": 119.0, "valid_targets_min": 64 }, { "epoch": 2.6970954356846475, "grad_norm": 3.6131999603541405, "learning_rate": 3.0917068772697934e-05, "loss": 0.7476, "loss_nan_ranks": 0, "loss_rank_avg": 0.6980721950531006, "step": 650, "valid_targets_mean": 146.2, "valid_targets_min": 78 }, { "epoch": 2.7178423236514524, "grad_norm": 3.732929812664138, "learning_rate": 3.074308292460646e-05, "loss": 0.6439, "loss_nan_ranks": 0, "loss_rank_avg": 0.8016985654830933, "step": 655, "valid_targets_mean": 153.6, "valid_targets_min": 57 }, { "epoch": 2.7385892116182573, "grad_norm": 3.0389793975855532, "learning_rate": 3.056794674964685e-05, "loss": 0.6437, "loss_nan_ranks": 0, "loss_rank_avg": 0.5522057414054871, "step": 660, "valid_targets_mean": 211.7, "valid_targets_min": 72 }, { "epoch": 2.759336099585062, "grad_norm": 3.985890415354317, "learning_rate": 3.0391679000708673e-05, "loss": 0.7794, "loss_nan_ranks": 0, "loss_rank_avg": 0.7437437176704407, "step": 665, "valid_targets_mean": 115.2, "valid_targets_min": 55 }, { "epoch": 2.780082987551867, "grad_norm": 4.76701659295223, "learning_rate": 3.0214298551845967e-05, "loss": 0.7425, "loss_nan_ranks": 0, "loss_rank_avg": 0.880343496799469, "step": 670, "valid_targets_mean": 113.9, "valid_targets_min": 71 }, { "epoch": 2.800829875518672, "grad_norm": 3.7563533860394323, "learning_rate": 3.0035824396256267e-05, "loss": 0.799, "loss_nan_ranks": 0, "loss_rank_avg": 0.7248696684837341, "step": 675, "valid_targets_mean": 106.1, "valid_targets_min": 61 }, { "epoch": 2.821576763485477, "grad_norm": 3.761242240389271, "learning_rate": 2.9856275644246903e-05, "loss": 0.7134, "loss_nan_ranks": 0, "loss_rank_avg": 0.8497253656387329, "step": 680, "valid_targets_mean": 139.9, "valid_targets_min": 69 }, { "epoch": 2.8423236514522823, "grad_norm": 4.290575353844001, "learning_rate": 2.9675671521188766e-05, "loss": 0.7767, "loss_nan_ranks": 0, "loss_rank_avg": 0.8362669944763184, "step": 685, "valid_targets_mean": 109.0, "valid_targets_min": 57 }, { "epoch": 2.863070539419087, "grad_norm": 4.343959204903679, "learning_rate": 2.949403136545769e-05, "loss": 0.8742, "loss_nan_ranks": 0, "loss_rank_avg": 0.9363041520118713, "step": 690, "valid_targets_mean": 118.2, "valid_targets_min": 63 }, { "epoch": 2.883817427385892, "grad_norm": 3.8671929065161064, "learning_rate": 2.9311374626363793e-05, "loss": 0.6692, "loss_nan_ranks": 0, "loss_rank_avg": 0.7386797666549683, "step": 695, "valid_targets_mean": 117.1, "valid_targets_min": 62 }, { "epoch": 2.904564315352697, "grad_norm": 2.4421362640071878, "learning_rate": 2.9127720862068928e-05, "loss": 0.7173, "loss_nan_ranks": 0, "loss_rank_avg": 0.43712466955184937, "step": 700, "valid_targets_mean": 225.9, "valid_targets_min": 63 }, { "epoch": 2.9253112033195023, "grad_norm": 3.79384332274057, "learning_rate": 2.8943089737492465e-05, "loss": 0.7826, "loss_nan_ranks": 0, "loss_rank_avg": 0.6691803932189941, "step": 705, "valid_targets_mean": 134.9, "valid_targets_min": 58 }, { "epoch": 2.9460580912863072, "grad_norm": 5.106290015763064, "learning_rate": 2.8757501022205653e-05, "loss": 0.7881, "loss_nan_ranks": 0, "loss_rank_avg": 0.6905944347381592, "step": 710, "valid_targets_mean": 113.7, "valid_targets_min": 65 }, { "epoch": 2.966804979253112, "grad_norm": 2.3434102426062147, "learning_rate": 2.8570974588314767e-05, "loss": 0.638, "loss_nan_ranks": 0, "loss_rank_avg": 0.38373029232025146, "step": 715, "valid_targets_mean": 209.8, "valid_targets_min": 68 }, { "epoch": 2.987551867219917, "grad_norm": 4.177527208829087, "learning_rate": 2.8383530408333285e-05, "loss": 0.7669, "loss_nan_ranks": 0, "loss_rank_avg": 0.8700437545776367, "step": 720, "valid_targets_mean": 106.6, "valid_targets_min": 65 }, { "epoch": 3.008298755186722, "grad_norm": 3.3708836828475635, "learning_rate": 2.8195188553043317e-05, "loss": 0.6422, "loss_nan_ranks": 0, "loss_rank_avg": 0.5712267160415649, "step": 725, "valid_targets_mean": 125.0, "valid_targets_min": 48 }, { "epoch": 3.029045643153527, "grad_norm": 4.331342732584377, "learning_rate": 2.800596918934648e-05, "loss": 0.5641, "loss_nan_ranks": 0, "loss_rank_avg": 0.6667015552520752, "step": 730, "valid_targets_mean": 108.5, "valid_targets_min": 57 }, { "epoch": 3.0497925311203318, "grad_norm": 7.744354080794957, "learning_rate": 2.7815892578104554e-05, "loss": 0.6125, "loss_nan_ranks": 0, "loss_rank_avg": 0.6798431873321533, "step": 735, "valid_targets_mean": 95.9, "valid_targets_min": 46 }, { "epoch": 3.070539419087137, "grad_norm": 4.7346730931654974, "learning_rate": 2.762497907196996e-05, "loss": 0.4968, "loss_nan_ranks": 0, "loss_rank_avg": 0.4088674783706665, "step": 740, "valid_targets_mean": 149.1, "valid_targets_min": 61 }, { "epoch": 3.091286307053942, "grad_norm": 3.8372167911375676, "learning_rate": 2.743324911320655e-05, "loss": 0.591, "loss_nan_ranks": 0, "loss_rank_avg": 0.4837598204612732, "step": 745, "valid_targets_mean": 155.9, "valid_targets_min": 48 }, { "epoch": 3.112033195020747, "grad_norm": 4.928852485615693, "learning_rate": 2.724072323150069e-05, "loss": 0.5391, "loss_nan_ranks": 0, "loss_rank_avg": 0.6401872038841248, "step": 750, "valid_targets_mean": 109.9, "valid_targets_min": 62 }, { "epoch": 3.132780082987552, "grad_norm": 5.373658977233531, "learning_rate": 2.704742204176301e-05, "loss": 0.5404, "loss_nan_ranks": 0, "loss_rank_avg": 0.7401503324508667, "step": 755, "valid_targets_mean": 104.4, "valid_targets_min": 75 }, { "epoch": 3.1535269709543567, "grad_norm": 4.695430238717871, "learning_rate": 2.6853366241921083e-05, "loss": 0.546, "loss_nan_ranks": 0, "loss_rank_avg": 0.507964015007019, "step": 760, "valid_targets_mean": 131.6, "valid_targets_min": 52 }, { "epoch": 3.1742738589211617, "grad_norm": 4.940895414049785, "learning_rate": 2.6658576610703142e-05, "loss": 0.5017, "loss_nan_ranks": 0, "loss_rank_avg": 0.44809240102767944, "step": 765, "valid_targets_mean": 160.5, "valid_targets_min": 59 }, { "epoch": 3.195020746887967, "grad_norm": 4.475353236399673, "learning_rate": 2.6463074005413187e-05, "loss": 0.4833, "loss_nan_ranks": 0, "loss_rank_avg": 0.4449211061000824, "step": 770, "valid_targets_mean": 152.4, "valid_targets_min": 72 }, { "epoch": 3.215767634854772, "grad_norm": 4.684709304749235, "learning_rate": 2.6266879359697647e-05, "loss": 0.5946, "loss_nan_ranks": 0, "loss_rank_avg": 0.5302928686141968, "step": 775, "valid_targets_mean": 129.9, "valid_targets_min": 72 }, { "epoch": 3.236514522821577, "grad_norm": 4.642771055970237, "learning_rate": 2.6070013681303933e-05, "loss": 0.5182, "loss_nan_ranks": 0, "loss_rank_avg": 0.5693954229354858, "step": 780, "valid_targets_mean": 121.1, "valid_targets_min": 63 }, { "epoch": 3.2572614107883817, "grad_norm": 4.02364920565193, "learning_rate": 2.5872498049830973e-05, "loss": 0.5421, "loss_nan_ranks": 0, "loss_rank_avg": 0.4645788073539734, "step": 785, "valid_targets_mean": 152.3, "valid_targets_min": 61 }, { "epoch": 3.2780082987551866, "grad_norm": 4.552089103946345, "learning_rate": 2.5674353614472084e-05, "loss": 0.4724, "loss_nan_ranks": 0, "loss_rank_avg": 0.5232946276664734, "step": 790, "valid_targets_mean": 110.2, "valid_targets_min": 63 }, { "epoch": 3.2987551867219915, "grad_norm": 3.7131472795548244, "learning_rate": 2.5475601591750448e-05, "loss": 0.4999, "loss_nan_ranks": 0, "loss_rank_avg": 0.40184569358825684, "step": 795, "valid_targets_mean": 262.1, "valid_targets_min": 49 }, { "epoch": 3.3195020746887964, "grad_norm": 5.143076340286301, "learning_rate": 2.5276263263247282e-05, "loss": 0.5116, "loss_nan_ranks": 0, "loss_rank_avg": 0.527146577835083, "step": 800, "valid_targets_mean": 110.8, "valid_targets_min": 62 }, { "epoch": 3.340248962655602, "grad_norm": 5.319558834403425, "learning_rate": 2.5076359973323107e-05, "loss": 0.6154, "loss_nan_ranks": 0, "loss_rank_avg": 0.6301993131637573, "step": 805, "valid_targets_mean": 96.8, "valid_targets_min": 56 }, { "epoch": 3.3609958506224067, "grad_norm": 3.8762909428008725, "learning_rate": 2.4875913126832297e-05, "loss": 0.5952, "loss_nan_ranks": 0, "loss_rank_avg": 0.4537183940410614, "step": 810, "valid_targets_mean": 173.0, "valid_targets_min": 57 }, { "epoch": 3.3817427385892116, "grad_norm": 7.622394767794957, "learning_rate": 2.4674944186831108e-05, "loss": 0.4865, "loss_nan_ranks": 0, "loss_rank_avg": 0.46812310814857483, "step": 815, "valid_targets_mean": 114.8, "valid_targets_min": 51 }, { "epoch": 3.4024896265560165, "grad_norm": 4.432218117678233, "learning_rate": 2.4473474672279497e-05, "loss": 0.504, "loss_nan_ranks": 0, "loss_rank_avg": 0.4488857686519623, "step": 820, "valid_targets_mean": 124.0, "valid_targets_min": 61 }, { "epoch": 3.4232365145228214, "grad_norm": 4.55996161463432, "learning_rate": 2.427152615573697e-05, "loss": 0.5078, "loss_nan_ranks": 0, "loss_rank_avg": 0.34916043281555176, "step": 825, "valid_targets_mean": 176.0, "valid_targets_min": 59 }, { "epoch": 3.4439834024896268, "grad_norm": 5.114951004982656, "learning_rate": 2.4069120261052682e-05, "loss": 0.5275, "loss_nan_ranks": 0, "loss_rank_avg": 0.6819808483123779, "step": 830, "valid_targets_mean": 105.2, "valid_targets_min": 71 }, { "epoch": 3.4647302904564317, "grad_norm": 5.28455932841826, "learning_rate": 2.386627866105002e-05, "loss": 0.5831, "loss_nan_ranks": 0, "loss_rank_avg": 0.6226446628570557, "step": 835, "valid_targets_mean": 134.7, "valid_targets_min": 60 }, { "epoch": 3.4854771784232366, "grad_norm": 4.473738543991443, "learning_rate": 2.3663023075205992e-05, "loss": 0.5253, "loss_nan_ranks": 0, "loss_rank_avg": 0.5064611434936523, "step": 840, "valid_targets_mean": 167.0, "valid_targets_min": 76 }, { "epoch": 3.5062240663900415, "grad_norm": 6.42051749883382, "learning_rate": 2.3459375267325552e-05, "loss": 0.5637, "loss_nan_ranks": 0, "loss_rank_avg": 0.517977237701416, "step": 845, "valid_targets_mean": 111.2, "valid_targets_min": 54 }, { "epoch": 3.5269709543568464, "grad_norm": 4.595241350402068, "learning_rate": 2.325535704321126e-05, "loss": 0.612, "loss_nan_ranks": 0, "loss_rank_avg": 0.5615861415863037, "step": 850, "valid_targets_mean": 147.6, "valid_targets_min": 70 }, { "epoch": 3.5477178423236513, "grad_norm": 5.192972612859483, "learning_rate": 2.3050990248328365e-05, "loss": 0.5303, "loss_nan_ranks": 0, "loss_rank_avg": 0.5923424959182739, "step": 855, "valid_targets_mean": 103.9, "valid_targets_min": 20 }, { "epoch": 3.568464730290456, "grad_norm": 5.3495277210724295, "learning_rate": 2.2846296765465708e-05, "loss": 0.5785, "loss_nan_ranks": 0, "loss_rank_avg": 0.6900249719619751, "step": 860, "valid_targets_mean": 98.2, "valid_targets_min": 48 }, { "epoch": 3.5892116182572615, "grad_norm": 4.61930376588509, "learning_rate": 2.2641298512392585e-05, "loss": 0.5289, "loss_nan_ranks": 0, "loss_rank_avg": 0.5852305293083191, "step": 865, "valid_targets_mean": 107.1, "valid_targets_min": 68 }, { "epoch": 3.6099585062240664, "grad_norm": 5.361219727707543, "learning_rate": 2.2436017439511878e-05, "loss": 0.519, "loss_nan_ranks": 0, "loss_rank_avg": 0.630679190158844, "step": 870, "valid_targets_mean": 99.9, "valid_targets_min": 48 }, { "epoch": 3.6307053941908713, "grad_norm": 4.72558048492487, "learning_rate": 2.2230475527509712e-05, "loss": 0.5643, "loss_nan_ranks": 0, "loss_rank_avg": 0.5127944350242615, "step": 875, "valid_targets_mean": 135.3, "valid_targets_min": 69 }, { "epoch": 3.6514522821576763, "grad_norm": 5.72644206689818, "learning_rate": 2.2024694785001814e-05, "loss": 0.5333, "loss_nan_ranks": 0, "loss_rank_avg": 0.5506484508514404, "step": 880, "valid_targets_mean": 91.6, "valid_targets_min": 56 }, { "epoch": 3.6721991701244816, "grad_norm": 5.396922771454308, "learning_rate": 2.1818697246176943e-05, "loss": 0.5145, "loss_nan_ranks": 0, "loss_rank_avg": 0.6444652676582336, "step": 885, "valid_targets_mean": 89.5, "valid_targets_min": 69 }, { "epoch": 3.6929460580912865, "grad_norm": 4.709143641817256, "learning_rate": 2.161250496843756e-05, "loss": 0.6124, "loss_nan_ranks": 0, "loss_rank_avg": 0.5286765098571777, "step": 890, "valid_targets_mean": 118.1, "valid_targets_min": 61 }, { "epoch": 3.7136929460580914, "grad_norm": 4.279711593845015, "learning_rate": 2.1406140030037988e-05, "loss": 0.5131, "loss_nan_ranks": 0, "loss_rank_avg": 0.508799135684967, "step": 895, "valid_targets_mean": 155.9, "valid_targets_min": 47 }, { "epoch": 3.7344398340248963, "grad_norm": 5.507064174030223, "learning_rate": 2.119962452772039e-05, "loss": 0.6005, "loss_nan_ranks": 0, "loss_rank_avg": 0.6413220763206482, "step": 900, "valid_targets_mean": 102.9, "valid_targets_min": 51 }, { "epoch": 3.7551867219917012, "grad_norm": 4.977065659139609, "learning_rate": 2.0992980574348687e-05, "loss": 0.637, "loss_nan_ranks": 0, "loss_rank_avg": 0.660423994064331, "step": 905, "valid_targets_mean": 114.2, "valid_targets_min": 63 }, { "epoch": 3.775933609958506, "grad_norm": 5.507287449132665, "learning_rate": 2.0786230296540864e-05, "loss": 0.562, "loss_nan_ranks": 0, "loss_rank_avg": 0.6502384543418884, "step": 910, "valid_targets_mean": 95.9, "valid_targets_min": 57 }, { "epoch": 3.796680497925311, "grad_norm": 3.542244490137563, "learning_rate": 2.0579395832299688e-05, "loss": 0.4917, "loss_nan_ranks": 0, "loss_rank_avg": 0.38029953837394714, "step": 915, "valid_targets_mean": 189.2, "valid_targets_min": 52 }, { "epoch": 3.817427385892116, "grad_norm": 5.057195244837334, "learning_rate": 2.0372499328642277e-05, "loss": 0.6087, "loss_nan_ranks": 0, "loss_rank_avg": 0.6501518487930298, "step": 920, "valid_targets_mean": 115.8, "valid_targets_min": 55 }, { "epoch": 3.8381742738589213, "grad_norm": 3.87850879824177, "learning_rate": 2.016556293922869e-05, "loss": 0.54, "loss_nan_ranks": 0, "loss_rank_avg": 0.45556652545928955, "step": 925, "valid_targets_mean": 136.7, "valid_targets_min": 61 }, { "epoch": 3.858921161825726, "grad_norm": 4.15102620173551, "learning_rate": 1.9958608821989792e-05, "loss": 0.5028, "loss_nan_ranks": 0, "loss_rank_avg": 0.5197992324829102, "step": 930, "valid_targets_mean": 172.0, "valid_targets_min": 43 }, { "epoch": 3.879668049792531, "grad_norm": 3.9883443533699623, "learning_rate": 1.9751659136754686e-05, "loss": 0.4334, "loss_nan_ranks": 0, "loss_rank_avg": 0.4564608335494995, "step": 935, "valid_targets_mean": 171.6, "valid_targets_min": 68 }, { "epoch": 3.900414937759336, "grad_norm": 3.8229294981435196, "learning_rate": 1.9544736042877886e-05, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.3890151381492615, "step": 940, "valid_targets_mean": 203.3, "valid_targets_min": 58 }, { "epoch": 3.921161825726141, "grad_norm": 4.770214843535073, "learning_rate": 1.9337861696866643e-05, "loss": 0.5395, "loss_nan_ranks": 0, "loss_rank_avg": 0.5733540058135986, "step": 945, "valid_targets_mean": 121.2, "valid_targets_min": 58 }, { "epoch": 3.9419087136929463, "grad_norm": 5.715733351887051, "learning_rate": 1.913105825000844e-05, "loss": 0.6749, "loss_nan_ranks": 0, "loss_rank_avg": 0.6428124904632568, "step": 950, "valid_targets_mean": 99.4, "valid_targets_min": 63 }, { "epoch": 3.962655601659751, "grad_norm": 5.247147480424706, "learning_rate": 1.8924347845999197e-05, "loss": 0.5692, "loss_nan_ranks": 0, "loss_rank_avg": 0.4043826162815094, "step": 955, "valid_targets_mean": 186.8, "valid_targets_min": 55 }, { "epoch": 3.983402489626556, "grad_norm": 4.693398960678804, "learning_rate": 1.871775261857215e-05, "loss": 0.5391, "loss_nan_ranks": 0, "loss_rank_avg": 0.5111123323440552, "step": 960, "valid_targets_mean": 134.2, "valid_targets_min": 49 }, { "epoch": 4.004149377593361, "grad_norm": 4.562791344546384, "learning_rate": 1.8511294689127887e-05, "loss": 0.5623, "loss_nan_ranks": 0, "loss_rank_avg": 0.4835764467716217, "step": 965, "valid_targets_mean": 91.6, "valid_targets_min": 46 }, { "epoch": 4.024896265560166, "grad_norm": 4.231590917124419, "learning_rate": 1.830499616436567e-05, "loss": 0.4125, "loss_nan_ranks": 0, "loss_rank_avg": 0.3629613518714905, "step": 970, "valid_targets_mean": 133.6, "valid_targets_min": 56 }, { "epoch": 4.045643153526971, "grad_norm": 6.451063449554833, "learning_rate": 1.8098879133916352e-05, "loss": 0.4243, "loss_nan_ranks": 0, "loss_rank_avg": 0.4441721737384796, "step": 975, "valid_targets_mean": 104.6, "valid_targets_min": 48 }, { "epoch": 4.066390041493776, "grad_norm": 7.27315245214559, "learning_rate": 1.789296566797706e-05, "loss": 0.4303, "loss_nan_ranks": 0, "loss_rank_avg": 0.44909948110580444, "step": 980, "valid_targets_mean": 116.8, "valid_targets_min": 59 }, { "epoch": 4.087136929460581, "grad_norm": 5.423253701096964, "learning_rate": 1.768727781494807e-05, "loss": 0.418, "loss_nan_ranks": 0, "loss_rank_avg": 0.4044135510921478, "step": 985, "valid_targets_mean": 116.2, "valid_targets_min": 59 }, { "epoch": 4.1078838174273855, "grad_norm": 6.060822859449416, "learning_rate": 1.7481837599071903e-05, "loss": 0.3992, "loss_nan_ranks": 0, "loss_rank_avg": 0.4017379879951477, "step": 990, "valid_targets_mean": 126.2, "valid_targets_min": 63 }, { "epoch": 4.12863070539419, "grad_norm": 6.047434247325548, "learning_rate": 1.7276667018075073e-05, "loss": 0.379, "loss_nan_ranks": 0, "loss_rank_avg": 0.4426099956035614, "step": 995, "valid_targets_mean": 105.3, "valid_targets_min": 68 }, { "epoch": 4.149377593360996, "grad_norm": 4.238813714812382, "learning_rate": 1.7071788040812655e-05, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.30655837059020996, "step": 1000, "valid_targets_mean": 140.9, "valid_targets_min": 59 }, { "epoch": 4.170124481327801, "grad_norm": 6.8069774791469015, "learning_rate": 1.686722260491597e-05, "loss": 0.3669, "loss_nan_ranks": 0, "loss_rank_avg": 0.36141982674598694, "step": 1005, "valid_targets_mean": 105.2, "valid_targets_min": 55 }, { "epoch": 4.190871369294606, "grad_norm": 6.0106894978076975, "learning_rate": 1.6662992614443525e-05, "loss": 0.3601, "loss_nan_ranks": 0, "loss_rank_avg": 0.44953832030296326, "step": 1010, "valid_targets_mean": 99.1, "valid_targets_min": 50 }, { "epoch": 4.211618257261411, "grad_norm": 4.13388889521017, "learning_rate": 1.6459119937535702e-05, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.2533540725708008, "step": 1015, "valid_targets_mean": 210.5, "valid_targets_min": 42 }, { "epoch": 4.232365145228216, "grad_norm": 4.270769059639924, "learning_rate": 1.6255626404073132e-05, "loss": 0.3857, "loss_nan_ranks": 0, "loss_rank_avg": 0.35333698987960815, "step": 1020, "valid_targets_mean": 151.1, "valid_targets_min": 71 }, { "epoch": 4.253112033195021, "grad_norm": 3.928272352429347, "learning_rate": 1.605253380333927e-05, "loss": 0.3545, "loss_nan_ranks": 0, "loss_rank_avg": 0.23659725487232208, "step": 1025, "valid_targets_mean": 209.1, "valid_targets_min": 58 }, { "epoch": 4.273858921161826, "grad_norm": 6.520862768289659, "learning_rate": 1.584986388168728e-05, "loss": 0.327, "loss_nan_ranks": 0, "loss_rank_avg": 0.3713211119174957, "step": 1030, "valid_targets_mean": 106.5, "valid_targets_min": 68 }, { "epoch": 4.2946058091286305, "grad_norm": 6.088618896351517, "learning_rate": 1.5647638340211525e-05, "loss": 0.4217, "loss_nan_ranks": 0, "loss_rank_avg": 0.4184168577194214, "step": 1035, "valid_targets_mean": 123.2, "valid_targets_min": 57 }, { "epoch": 4.3153526970954355, "grad_norm": 6.739392196333286, "learning_rate": 1.5445878832423876e-05, "loss": 0.3972, "loss_nan_ranks": 0, "loss_rank_avg": 0.4932287633419037, "step": 1040, "valid_targets_mean": 93.1, "valid_targets_min": 55 }, { "epoch": 4.33609958506224, "grad_norm": 4.816460152015017, "learning_rate": 1.5244606961935187e-05, "loss": 0.4227, "loss_nan_ranks": 0, "loss_rank_avg": 0.33327189087867737, "step": 1045, "valid_targets_mean": 126.0, "valid_targets_min": 63 }, { "epoch": 4.356846473029045, "grad_norm": 4.85922792161953, "learning_rate": 1.5043844280142005e-05, "loss": 0.3818, "loss_nan_ranks": 0, "loss_rank_avg": 0.3491777181625366, "step": 1050, "valid_targets_mean": 144.3, "valid_targets_min": 66 }, { "epoch": 4.377593360995851, "grad_norm": 5.503313733342627, "learning_rate": 1.4843612283918995e-05, "loss": 0.3707, "loss_nan_ranks": 0, "loss_rank_avg": 0.3861674666404724, "step": 1055, "valid_targets_mean": 145.2, "valid_targets_min": 68 }, { "epoch": 4.398340248962656, "grad_norm": 3.744381810663879, "learning_rate": 1.4643932413317079e-05, "loss": 0.3399, "loss_nan_ranks": 0, "loss_rank_avg": 0.2514806091785431, "step": 1060, "valid_targets_mean": 166.9, "valid_targets_min": 69 }, { "epoch": 4.419087136929461, "grad_norm": 4.739596531448335, "learning_rate": 1.4444826049267784e-05, "loss": 0.3923, "loss_nan_ranks": 0, "loss_rank_avg": 0.32454103231430054, "step": 1065, "valid_targets_mean": 124.9, "valid_targets_min": 56 }, { "epoch": 4.439834024896266, "grad_norm": 5.668748766294243, "learning_rate": 1.4246314511293777e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.3947162330150604, "step": 1070, "valid_targets_mean": 139.6, "valid_targets_min": 58 }, { "epoch": 4.460580912863071, "grad_norm": 5.417321720671694, "learning_rate": 1.4048419055226146e-05, "loss": 0.4178, "loss_nan_ranks": 0, "loss_rank_avg": 0.3876382112503052, "step": 1075, "valid_targets_mean": 137.8, "valid_targets_min": 64 }, { "epoch": 4.481327800829876, "grad_norm": 5.060908418832114, "learning_rate": 1.3851160870928317e-05, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.38143742084503174, "step": 1080, "valid_targets_mean": 117.8, "valid_targets_min": 66 }, { "epoch": 4.5020746887966805, "grad_norm": 4.781433991361493, "learning_rate": 1.3654561080027213e-05, "loss": 0.343, "loss_nan_ranks": 0, "loss_rank_avg": 0.3337147533893585, "step": 1085, "valid_targets_mean": 156.2, "valid_targets_min": 66 }, { "epoch": 4.522821576763485, "grad_norm": 6.7756272116807175, "learning_rate": 1.345864073365157e-05, "loss": 0.393, "loss_nan_ranks": 0, "loss_rank_avg": 0.5130104422569275, "step": 1090, "valid_targets_mean": 103.8, "valid_targets_min": 55 }, { "epoch": 4.54356846473029, "grad_norm": 6.000927566190182, "learning_rate": 1.3263420810177902e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.4507676661014557, "step": 1095, "valid_targets_mean": 105.2, "valid_targets_min": 55 }, { "epoch": 4.564315352697095, "grad_norm": 6.058238312428124, "learning_rate": 1.3068922212984188e-05, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.4390409588813782, "step": 1100, "valid_targets_mean": 97.3, "valid_targets_min": 64 }, { "epoch": 4.5850622406639, "grad_norm": 4.203708752511155, "learning_rate": 1.287516576821167e-05, "loss": 0.4029, "loss_nan_ranks": 0, "loss_rank_avg": 0.332113116979599, "step": 1105, "valid_targets_mean": 178.4, "valid_targets_min": 69 }, { "epoch": 4.605809128630705, "grad_norm": 5.207374310292064, "learning_rate": 1.2682172222534805e-05, "loss": 0.3313, "loss_nan_ranks": 0, "loss_rank_avg": 0.4056367874145508, "step": 1110, "valid_targets_mean": 100.6, "valid_targets_min": 60 }, { "epoch": 4.62655601659751, "grad_norm": 4.98336947480142, "learning_rate": 1.2489962240939857e-05, "loss": 0.3613, "loss_nan_ranks": 0, "loss_rank_avg": 0.2771909534931183, "step": 1115, "valid_targets_mean": 159.0, "valid_targets_min": 61 }, { "epoch": 4.647302904564316, "grad_norm": 3.6594235056108, "learning_rate": 1.229855640451213e-05, "loss": 0.3179, "loss_nan_ranks": 0, "loss_rank_avg": 0.16788652539253235, "step": 1120, "valid_targets_mean": 306.2, "valid_targets_min": 68 }, { "epoch": 4.668049792531121, "grad_norm": 8.687907658737734, "learning_rate": 1.2107975208232259e-05, "loss": 0.4198, "loss_nan_ranks": 0, "loss_rank_avg": 0.379871666431427, "step": 1125, "valid_targets_mean": 133.1, "valid_targets_min": 61 }, { "epoch": 4.6887966804979255, "grad_norm": 5.836548120395633, "learning_rate": 1.1918239058781636e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.3218669891357422, "step": 1130, "valid_targets_mean": 127.9, "valid_targets_min": 70 }, { "epoch": 4.70954356846473, "grad_norm": 4.201638283395644, "learning_rate": 1.1729368272357419e-05, "loss": 0.3285, "loss_nan_ranks": 0, "loss_rank_avg": 0.23533934354782104, "step": 1135, "valid_targets_mean": 199.4, "valid_targets_min": 64 }, { "epoch": 4.730290456431535, "grad_norm": 5.095855157677563, "learning_rate": 1.1541383072497077e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.38103365898132324, "step": 1140, "valid_targets_mean": 140.2, "valid_targets_min": 59 }, { "epoch": 4.75103734439834, "grad_norm": 6.043002128231546, "learning_rate": 1.1354303587913003e-05, "loss": 0.337, "loss_nan_ranks": 0, "loss_rank_avg": 0.39897701144218445, "step": 1145, "valid_targets_mean": 124.3, "valid_targets_min": 51 }, { "epoch": 4.771784232365145, "grad_norm": 5.958399057291772, "learning_rate": 1.1168149850337136e-05, "loss": 0.3401, "loss_nan_ranks": 0, "loss_rank_avg": 0.44645410776138306, "step": 1150, "valid_targets_mean": 117.4, "valid_targets_min": 55 }, { "epoch": 4.79253112033195, "grad_norm": 4.690423833511766, "learning_rate": 1.0982941792376125e-05, "loss": 0.3953, "loss_nan_ranks": 0, "loss_rank_avg": 0.35815998911857605, "step": 1155, "valid_targets_mean": 129.4, "valid_targets_min": 57 }, { "epoch": 4.813278008298755, "grad_norm": 5.286714186295266, "learning_rate": 1.0798699245376959e-05, "loss": 0.41, "loss_nan_ranks": 0, "loss_rank_avg": 0.34337374567985535, "step": 1160, "valid_targets_mean": 98.7, "valid_targets_min": 50 }, { "epoch": 4.83402489626556, "grad_norm": 4.8262261611258985, "learning_rate": 1.0615441937303534e-05, "loss": 0.3275, "loss_nan_ranks": 0, "loss_rank_avg": 0.32752180099487305, "step": 1165, "valid_targets_mean": 132.2, "valid_targets_min": 69 }, { "epoch": 4.854771784232365, "grad_norm": 5.7535756694573665, "learning_rate": 1.0433189490624253e-05, "loss": 0.3779, "loss_nan_ranks": 0, "loss_rank_avg": 0.4661560356616974, "step": 1170, "valid_targets_mean": 97.8, "valid_targets_min": 64 }, { "epoch": 4.875518672199171, "grad_norm": 4.2819905844779775, "learning_rate": 1.0251961420210937e-05, "loss": 0.3465, "loss_nan_ranks": 0, "loss_rank_avg": 0.33195045590400696, "step": 1175, "valid_targets_mean": 179.1, "valid_targets_min": 72 }, { "epoch": 4.8962655601659755, "grad_norm": 3.688190995957866, "learning_rate": 1.0071777131249237e-05, "loss": 0.3008, "loss_nan_ranks": 0, "loss_rank_avg": 0.2772628664970398, "step": 1180, "valid_targets_mean": 161.6, "valid_targets_min": 72 }, { "epoch": 4.91701244813278, "grad_norm": 5.965502494236917, "learning_rate": 9.892655917160814e-06, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.4376785159111023, "step": 1185, "valid_targets_mean": 114.5, "valid_targets_min": 63 }, { "epoch": 4.937759336099585, "grad_norm": 5.200859052929826, "learning_rate": 9.714616957537466e-06, "loss": 0.3356, "loss_nan_ranks": 0, "loss_rank_avg": 0.35819631814956665, "step": 1190, "valid_targets_mean": 124.4, "valid_targets_min": 53 }, { "epoch": 4.95850622406639, "grad_norm": 4.905971401096997, "learning_rate": 9.537679316087491e-06, "loss": 0.3299, "loss_nan_ranks": 0, "loss_rank_avg": 0.33606231212615967, "step": 1195, "valid_targets_mean": 158.6, "valid_targets_min": 69 }, { "epoch": 4.979253112033195, "grad_norm": 5.932174753958537, "learning_rate": 9.361861938594332e-06, "loss": 0.3432, "loss_nan_ranks": 0, "loss_rank_avg": 0.44172829389572144, "step": 1200, "valid_targets_mean": 106.7, "valid_targets_min": 62 }, { "epoch": 5.0, "grad_norm": 5.482278806871232, "learning_rate": 9.187183650888056e-06, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.35782986879348755, "step": 1205, "valid_targets_mean": 116.6, "valid_targets_min": 46 }, { "epoch": 5.020746887966805, "grad_norm": 4.030939056748361, "learning_rate": 9.013663156829438e-06, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.24931949377059937, "step": 1210, "valid_targets_mean": 123.8, "valid_targets_min": 60 }, { "epoch": 5.04149377593361, "grad_norm": 3.738822362764231, "learning_rate": 8.841319036307334e-06, "loss": 0.253, "loss_nan_ranks": 0, "loss_rank_avg": 0.17903779447078705, "step": 1215, "valid_targets_mean": 230.9, "valid_targets_min": 47 }, { "epoch": 5.062240663900415, "grad_norm": 7.551485917813854, "learning_rate": 8.670169743249143e-06, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.32046300172805786, "step": 1220, "valid_targets_mean": 85.4, "valid_targets_min": 54 }, { "epoch": 5.08298755186722, "grad_norm": 5.108187510802179, "learning_rate": 8.50023360364487e-06, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.19512224197387695, "step": 1225, "valid_targets_mean": 146.3, "valid_targets_min": 53 }, { "epoch": 5.1037344398340245, "grad_norm": 6.161943015701863, "learning_rate": 8.331528813584832e-06, "loss": 0.2688, "loss_nan_ranks": 0, "loss_rank_avg": 0.3473849594593048, "step": 1230, "valid_targets_mean": 102.7, "valid_targets_min": 59 }, { "epoch": 5.124481327800829, "grad_norm": 5.4077143243461325, "learning_rate": 8.164073437311315e-06, "loss": 0.3074, "loss_nan_ranks": 0, "loss_rank_avg": 0.329645037651062, "step": 1235, "valid_targets_mean": 109.8, "valid_targets_min": 50 }, { "epoch": 5.145228215767635, "grad_norm": 3.9507322454113507, "learning_rate": 7.997885405284305e-06, "loss": 0.2572, "loss_nan_ranks": 0, "loss_rank_avg": 0.1828637272119522, "step": 1240, "valid_targets_mean": 137.2, "valid_targets_min": 64 }, { "epoch": 5.16597510373444, "grad_norm": 4.418001305578677, "learning_rate": 7.83298251226158e-06, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.22215382754802704, "step": 1245, "valid_targets_mean": 156.5, "valid_targets_min": 64 }, { "epoch": 5.186721991701245, "grad_norm": 4.865514725466622, "learning_rate": 7.669382415393298e-06, "loss": 0.2724, "loss_nan_ranks": 0, "loss_rank_avg": 0.21926715970039368, "step": 1250, "valid_targets_mean": 142.2, "valid_targets_min": 62 }, { "epoch": 5.20746887966805, "grad_norm": 4.671773078750636, "learning_rate": 7.507102632331382e-06, "loss": 0.2, "loss_nan_ranks": 0, "loss_rank_avg": 0.21363668143749237, "step": 1255, "valid_targets_mean": 119.7, "valid_targets_min": 59 }, { "epoch": 5.228215767634855, "grad_norm": 7.747309257352988, "learning_rate": 7.3461605393537415e-06, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.36064910888671875, "step": 1260, "valid_targets_mean": 103.8, "valid_targets_min": 54 }, { "epoch": 5.24896265560166, "grad_norm": 5.836162881737733, "learning_rate": 7.186573369503731e-06, "loss": 0.2643, "loss_nan_ranks": 0, "loss_rank_avg": 0.22864724695682526, "step": 1265, "valid_targets_mean": 155.4, "valid_targets_min": 52 }, { "epoch": 5.269709543568465, "grad_norm": 6.109299844374152, "learning_rate": 7.028358210744881e-06, "loss": 0.2853, "loss_nan_ranks": 0, "loss_rank_avg": 0.3166709840297699, "step": 1270, "valid_targets_mean": 109.1, "valid_targets_min": 51 }, { "epoch": 5.29045643153527, "grad_norm": 4.832159172426314, "learning_rate": 6.8715320041312095e-06, "loss": 0.2802, "loss_nan_ranks": 0, "loss_rank_avg": 0.3006090521812439, "step": 1275, "valid_targets_mean": 115.7, "valid_targets_min": 66 }, { "epoch": 5.3112033195020745, "grad_norm": 3.182299417076334, "learning_rate": 6.716111541993213e-06, "loss": 0.2687, "loss_nan_ranks": 0, "loss_rank_avg": 0.1729133427143097, "step": 1280, "valid_targets_mean": 199.3, "valid_targets_min": 68 }, { "epoch": 5.331950207468879, "grad_norm": 5.136124561619918, "learning_rate": 6.562113466139836e-06, "loss": 0.273, "loss_nan_ranks": 0, "loss_rank_avg": 0.2921794056892395, "step": 1285, "valid_targets_mean": 98.2, "valid_targets_min": 55 }, { "epoch": 5.352697095435684, "grad_norm": 6.387158477899295, "learning_rate": 6.4095542660765145e-06, "loss": 0.2853, "loss_nan_ranks": 0, "loss_rank_avg": 0.33419498801231384, "step": 1290, "valid_targets_mean": 112.9, "valid_targets_min": 49 }, { "epoch": 5.37344398340249, "grad_norm": 4.174496355970623, "learning_rate": 6.258450277239545e-06, "loss": 0.2691, "loss_nan_ranks": 0, "loss_rank_avg": 0.2561451494693756, "step": 1295, "valid_targets_mean": 127.8, "valid_targets_min": 56 }, { "epoch": 5.394190871369295, "grad_norm": 4.555697459816092, "learning_rate": 6.108817679246979e-06, "loss": 0.2528, "loss_nan_ranks": 0, "loss_rank_avg": 0.22688698768615723, "step": 1300, "valid_targets_mean": 113.3, "valid_targets_min": 72 }, { "epoch": 5.4149377593361, "grad_norm": 3.7328921616493482, "learning_rate": 5.960672494166113e-06, "loss": 0.2519, "loss_nan_ranks": 0, "loss_rank_avg": 0.18989375233650208, "step": 1305, "valid_targets_mean": 173.1, "valid_targets_min": 64 }, { "epoch": 5.435684647302905, "grad_norm": 4.693648138618525, "learning_rate": 5.8140305847979895e-06, "loss": 0.2879, "loss_nan_ranks": 0, "loss_rank_avg": 0.24130411446094513, "step": 1310, "valid_targets_mean": 112.8, "valid_targets_min": 58 }, { "epoch": 5.45643153526971, "grad_norm": 6.1245996173536295, "learning_rate": 5.668907652978783e-06, "loss": 0.287, "loss_nan_ranks": 0, "loss_rank_avg": 0.3436064124107361, "step": 1315, "valid_targets_mean": 101.9, "valid_targets_min": 65 }, { "epoch": 5.477178423236515, "grad_norm": 4.880336940097785, "learning_rate": 5.5253192378985966e-06, "loss": 0.2254, "loss_nan_ranks": 0, "loss_rank_avg": 0.2438402771949768, "step": 1320, "valid_targets_mean": 172.6, "valid_targets_min": 56 }, { "epoch": 5.4979253112033195, "grad_norm": 5.184523533528152, "learning_rate": 5.383280714437518e-06, "loss": 0.2503, "loss_nan_ranks": 0, "loss_rank_avg": 0.3002464175224304, "step": 1325, "valid_targets_mean": 124.2, "valid_targets_min": 41 }, { "epoch": 5.518672199170124, "grad_norm": 5.31824198148948, "learning_rate": 5.242807291519374e-06, "loss": 0.2999, "loss_nan_ranks": 0, "loss_rank_avg": 0.2796890139579773, "step": 1330, "valid_targets_mean": 105.6, "valid_targets_min": 52 }, { "epoch": 5.539419087136929, "grad_norm": 4.406452877804828, "learning_rate": 5.103914010483206e-06, "loss": 0.1811, "loss_nan_ranks": 0, "loss_rank_avg": 0.16050797700881958, "step": 1335, "valid_targets_mean": 169.6, "valid_targets_min": 51 }, { "epoch": 5.560165975103734, "grad_norm": 5.526397123023929, "learning_rate": 4.966615743472709e-06, "loss": 0.2922, "loss_nan_ranks": 0, "loss_rank_avg": 0.302845299243927, "step": 1340, "valid_targets_mean": 102.0, "valid_targets_min": 61 }, { "epoch": 5.580912863070539, "grad_norm": 4.475088302223254, "learning_rate": 4.830927191843779e-06, "loss": 0.2274, "loss_nan_ranks": 0, "loss_rank_avg": 0.22871732711791992, "step": 1345, "valid_targets_mean": 112.8, "valid_targets_min": 66 }, { "epoch": 5.601659751037344, "grad_norm": 3.827600528927148, "learning_rate": 4.696862884590349e-06, "loss": 0.2435, "loss_nan_ranks": 0, "loss_rank_avg": 0.17285335063934326, "step": 1350, "valid_targets_mean": 158.9, "valid_targets_min": 56 }, { "epoch": 5.622406639004149, "grad_norm": 5.486601988943072, "learning_rate": 4.564437176788681e-06, "loss": 0.2842, "loss_nan_ranks": 0, "loss_rank_avg": 0.2588198482990265, "step": 1355, "valid_targets_mean": 116.6, "valid_targets_min": 55 }, { "epoch": 5.643153526970955, "grad_norm": 4.279231303644842, "learning_rate": 4.433664248060295e-06, "loss": 0.229, "loss_nan_ranks": 0, "loss_rank_avg": 0.2061527669429779, "step": 1360, "valid_targets_mean": 153.4, "valid_targets_min": 81 }, { "epoch": 5.66390041493776, "grad_norm": 5.487514913708906, "learning_rate": 4.304558101053629e-06, "loss": 0.2755, "loss_nan_ranks": 0, "loss_rank_avg": 0.23738700151443481, "step": 1365, "valid_targets_mean": 129.2, "valid_targets_min": 53 }, { "epoch": 5.6846473029045645, "grad_norm": 4.05314290527769, "learning_rate": 4.177132559944761e-06, "loss": 0.2214, "loss_nan_ranks": 0, "loss_rank_avg": 0.196598082780838, "step": 1370, "valid_targets_mean": 164.5, "valid_targets_min": 64 }, { "epoch": 5.7053941908713695, "grad_norm": 6.2913597721448, "learning_rate": 4.051401268957087e-06, "loss": 0.27, "loss_nan_ranks": 0, "loss_rank_avg": 0.3582676351070404, "step": 1375, "valid_targets_mean": 107.6, "valid_targets_min": 65 }, { "epoch": 5.726141078838174, "grad_norm": 6.031743246784628, "learning_rate": 3.927377690900436e-06, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.25066938996315, "step": 1380, "valid_targets_mean": 127.6, "valid_targets_min": 59 }, { "epoch": 5.746887966804979, "grad_norm": 5.214705770189645, "learning_rate": 3.805075105729459e-06, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.25866299867630005, "step": 1385, "valid_targets_mean": 123.2, "valid_targets_min": 55 }, { "epoch": 5.767634854771784, "grad_norm": 4.524404310503156, "learning_rate": 3.6845066091216917e-06, "loss": 0.2862, "loss_nan_ranks": 0, "loss_rank_avg": 0.23066316545009613, "step": 1390, "valid_targets_mean": 128.7, "valid_targets_min": 63 }, { "epoch": 5.788381742738589, "grad_norm": 5.097443863683129, "learning_rate": 3.56568511107533e-06, "loss": 0.2365, "loss_nan_ranks": 0, "loss_rank_avg": 0.28775590658187866, "step": 1395, "valid_targets_mean": 93.3, "valid_targets_min": 55 }, { "epoch": 5.809128630705394, "grad_norm": 5.670717377040933, "learning_rate": 3.448623334526853e-06, "loss": 0.3138, "loss_nan_ranks": 0, "loss_rank_avg": 0.3401385545730591, "step": 1400, "valid_targets_mean": 94.4, "valid_targets_min": 59 }, { "epoch": 5.829875518672199, "grad_norm": 4.62592863868813, "learning_rate": 3.333333813988726e-06, "loss": 0.2836, "loss_nan_ranks": 0, "loss_rank_avg": 0.24296891689300537, "step": 1405, "valid_targets_mean": 131.2, "valid_targets_min": 55 }, { "epoch": 5.850622406639004, "grad_norm": 5.086749493331627, "learning_rate": 3.219828894207242e-06, "loss": 0.2612, "loss_nan_ranks": 0, "loss_rank_avg": 0.2650941014289856, "step": 1410, "valid_targets_mean": 124.6, "valid_targets_min": 78 }, { "epoch": 5.87136929460581, "grad_norm": 4.85811557310476, "learning_rate": 3.1081207288406846e-06, "loss": 0.2353, "loss_nan_ranks": 0, "loss_rank_avg": 0.2586798667907715, "step": 1415, "valid_targets_mean": 111.1, "valid_targets_min": 41 }, { "epoch": 5.8921161825726145, "grad_norm": 2.889073243641731, "learning_rate": 2.9982212791580044e-06, "loss": 0.2309, "loss_nan_ranks": 0, "loss_rank_avg": 0.15041622519493103, "step": 1420, "valid_targets_mean": 211.2, "valid_targets_min": 67 }, { "epoch": 5.912863070539419, "grad_norm": 4.7851155337037925, "learning_rate": 2.890142312757982e-06, "loss": 0.2902, "loss_nan_ranks": 0, "loss_rank_avg": 0.26748985052108765, "step": 1425, "valid_targets_mean": 115.7, "valid_targets_min": 47 }, { "epoch": 5.933609958506224, "grad_norm": 4.580807918668048, "learning_rate": 2.7838954023092845e-06, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.2532392144203186, "step": 1430, "valid_targets_mean": 122.4, "valid_targets_min": 60 }, { "epoch": 5.954356846473029, "grad_norm": 5.472491432141598, "learning_rate": 2.679491924311226e-06, "loss": 0.254, "loss_nan_ranks": 0, "loss_rank_avg": 0.27012646198272705, "step": 1435, "valid_targets_mean": 116.7, "valid_targets_min": 57 }, { "epoch": 5.975103734439834, "grad_norm": 4.968702559787331, "learning_rate": 2.576943057875696e-06, "loss": 0.2627, "loss_nan_ranks": 0, "loss_rank_avg": 0.23716619610786438, "step": 1440, "valid_targets_mean": 146.7, "valid_targets_min": 61 }, { "epoch": 5.995850622406639, "grad_norm": 3.6763933325276374, "learning_rate": 2.4762597835300815e-06, "loss": 0.2422, "loss_nan_ranks": 0, "loss_rank_avg": 0.17048585414886475, "step": 1445, "valid_targets_mean": 182.2, "valid_targets_min": 62 }, { "epoch": 6.016597510373444, "grad_norm": 3.4439805728949118, "learning_rate": 2.377452882041551e-06, "loss": 0.2238, "loss_nan_ranks": 0, "loss_rank_avg": 0.20334039628505707, "step": 1450, "valid_targets_mean": 121.8, "valid_targets_min": 48 }, { "epoch": 6.037344398340249, "grad_norm": 4.169982176027096, "learning_rate": 2.280532933262678e-06, "loss": 0.1914, "loss_nan_ranks": 0, "loss_rank_avg": 0.22471293807029724, "step": 1455, "valid_targets_mean": 117.9, "valid_targets_min": 55 }, { "epoch": 6.058091286307054, "grad_norm": 4.451395467337526, "learning_rate": 2.1855103149985934e-06, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.26145243644714355, "step": 1460, "valid_targets_mean": 95.1, "valid_targets_min": 59 }, { "epoch": 6.078838174273859, "grad_norm": 4.720931193547068, "learning_rate": 2.0923952018957826e-06, "loss": 0.2003, "loss_nan_ranks": 0, "loss_rank_avg": 0.2084977924823761, "step": 1465, "valid_targets_mean": 112.5, "valid_targets_min": 70 }, { "epoch": 6.0995850622406635, "grad_norm": 4.908093674427741, "learning_rate": 2.0011975643526106e-06, "loss": 0.185, "loss_nan_ranks": 0, "loss_rank_avg": 0.2433365285396576, "step": 1470, "valid_targets_mean": 120.3, "valid_targets_min": 67 }, { "epoch": 6.1203319502074685, "grad_norm": 4.960759939432554, "learning_rate": 1.9119271674517305e-06, "loss": 0.1719, "loss_nan_ranks": 0, "loss_rank_avg": 0.2257917821407318, "step": 1475, "valid_targets_mean": 112.6, "valid_targets_min": 61 }, { "epoch": 6.141078838174274, "grad_norm": 6.960952455067493, "learning_rate": 1.8245935699145035e-06, "loss": 0.2254, "loss_nan_ranks": 0, "loss_rank_avg": 0.2723103165626526, "step": 1480, "valid_targets_mean": 106.7, "valid_targets_min": 66 }, { "epoch": 6.161825726141079, "grad_norm": 4.632315701623262, "learning_rate": 1.7392061230774371e-06, "loss": 0.1786, "loss_nan_ranks": 0, "loss_rank_avg": 0.23765739798545837, "step": 1485, "valid_targets_mean": 107.6, "valid_targets_min": 65 }, { "epoch": 6.182572614107884, "grad_norm": 5.756675590953208, "learning_rate": 1.6557739698909436e-06, "loss": 0.2062, "loss_nan_ranks": 0, "loss_rank_avg": 0.26241517066955566, "step": 1490, "valid_targets_mean": 104.4, "valid_targets_min": 62 }, { "epoch": 6.203319502074689, "grad_norm": 3.871596830434273, "learning_rate": 1.574306043940288e-06, "loss": 0.1752, "loss_nan_ranks": 0, "loss_rank_avg": 0.15879438817501068, "step": 1495, "valid_targets_mean": 137.4, "valid_targets_min": 58 }, { "epoch": 6.224066390041494, "grad_norm": 4.069174001456709, "learning_rate": 1.4948110684890726e-06, "loss": 0.2002, "loss_nan_ranks": 0, "loss_rank_avg": 0.12537790834903717, "step": 1500, "valid_targets_mean": 171.4, "valid_targets_min": 64 }, { "epoch": 6.244813278008299, "grad_norm": 5.581161273740655, "learning_rate": 1.4172975555451363e-06, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.2820102572441101, "step": 1505, "valid_targets_mean": 102.7, "valid_targets_min": 68 }, { "epoch": 6.265560165975104, "grad_norm": 5.524451235658226, "learning_rate": 1.3417738049491536e-06, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.28135719895362854, "step": 1510, "valid_targets_mean": 96.0, "valid_targets_min": 61 }, { "epoch": 6.286307053941909, "grad_norm": 4.869973029745467, "learning_rate": 1.268247903485902e-06, "loss": 0.198, "loss_nan_ranks": 0, "loss_rank_avg": 0.25688737630844116, "step": 1515, "valid_targets_mean": 101.5, "valid_targets_min": 62 }, { "epoch": 6.3070539419087135, "grad_norm": 4.314906507746581, "learning_rate": 1.1967277240183716e-06, "loss": 0.21, "loss_nan_ranks": 0, "loss_rank_avg": 0.191103994846344, "step": 1520, "valid_targets_mean": 120.8, "valid_targets_min": 60 }, { "epoch": 6.327800829875518, "grad_norm": 4.3035104208842085, "learning_rate": 1.1272209246447696e-06, "loss": 0.2066, "loss_nan_ranks": 0, "loss_rank_avg": 0.17241765558719635, "step": 1525, "valid_targets_mean": 130.2, "valid_targets_min": 55 }, { "epoch": 6.348547717842323, "grad_norm": 3.91462234843459, "learning_rate": 1.0597349478785123e-06, "loss": 0.199, "loss_nan_ranks": 0, "loss_rank_avg": 0.22027219831943512, "step": 1530, "valid_targets_mean": 157.9, "valid_targets_min": 59 }, { "epoch": 6.369294605809129, "grad_norm": 4.161379790290467, "learning_rate": 9.942770198513218e-07, "loss": 0.2284, "loss_nan_ranks": 0, "loss_rank_avg": 0.19286894798278809, "step": 1535, "valid_targets_mean": 130.5, "valid_targets_min": 55 }, { "epoch": 6.390041493775934, "grad_norm": 3.979992570097402, "learning_rate": 9.308541495394751e-07, "loss": 0.2092, "loss_nan_ranks": 0, "loss_rank_avg": 0.1797044277191162, "step": 1540, "valid_targets_mean": 127.6, "valid_targets_min": 63 }, { "epoch": 6.410788381742739, "grad_norm": 4.578593444010601, "learning_rate": 8.694731280133051e-07, "loss": 0.2104, "loss_nan_ranks": 0, "loss_rank_avg": 0.25974029302597046, "step": 1545, "valid_targets_mean": 120.4, "valid_targets_min": 66 }, { "epoch": 6.431535269709544, "grad_norm": 14.736273677745213, "learning_rate": 8.101405277100549e-07, "loss": 0.2306, "loss_nan_ranks": 0, "loss_rank_avg": 0.2802034020423889, "step": 1550, "valid_targets_mean": 92.9, "valid_targets_min": 61 }, { "epoch": 6.452282157676349, "grad_norm": 6.111463784813684, "learning_rate": 7.528627017301016e-07, "loss": 0.1801, "loss_nan_ranks": 0, "loss_rank_avg": 0.29020482301712036, "step": 1555, "valid_targets_mean": 98.5, "valid_targets_min": 69 }, { "epoch": 6.473029045643154, "grad_norm": 4.419863069023317, "learning_rate": 6.976457831567262e-07, "loss": 0.19, "loss_nan_ranks": 0, "loss_rank_avg": 0.22686433792114258, "step": 1560, "valid_targets_mean": 121.2, "valid_targets_min": 57 }, { "epoch": 6.4937759336099585, "grad_norm": 5.3614306604918, "learning_rate": 6.444956843993754e-07, "loss": 0.2115, "loss_nan_ranks": 0, "loss_rank_avg": 0.24041424691677094, "step": 1565, "valid_targets_mean": 99.8, "valid_targets_min": 52 }, { "epoch": 6.514522821576763, "grad_norm": 10.164349086292562, "learning_rate": 5.934180965606007e-07, "loss": 0.2254, "loss_nan_ranks": 0, "loss_rank_avg": 0.2823329567909241, "step": 1570, "valid_targets_mean": 99.8, "valid_targets_min": 69 }, { "epoch": 6.535269709543568, "grad_norm": 5.2042900023987055, "learning_rate": 5.444184888266768e-07, "loss": 0.2009, "loss_nan_ranks": 0, "loss_rank_avg": 0.244424968957901, "step": 1575, "valid_targets_mean": 88.3, "valid_targets_min": 51 }, { "epoch": 6.556016597510373, "grad_norm": 4.462057874376993, "learning_rate": 4.975021078819731e-07, "loss": 0.1839, "loss_nan_ranks": 0, "loss_rank_avg": 0.22408881783485413, "step": 1580, "valid_targets_mean": 126.3, "valid_targets_min": 50 }, { "epoch": 6.576763485477178, "grad_norm": 5.410684806378169, "learning_rate": 4.5267397734717113e-07, "loss": 0.2637, "loss_nan_ranks": 0, "loss_rank_avg": 0.27906614542007446, "step": 1585, "valid_targets_mean": 92.4, "valid_targets_min": 57 }, { "epoch": 6.597510373443983, "grad_norm": 4.119111648737928, "learning_rate": 4.0993889724135314e-07, "loss": 0.2036, "loss_nan_ranks": 0, "loss_rank_avg": 0.1697758138179779, "step": 1590, "valid_targets_mean": 143.6, "valid_targets_min": 61 }, { "epoch": 6.618257261410788, "grad_norm": 5.179582255288888, "learning_rate": 3.693014434680242e-07, "loss": 0.2089, "loss_nan_ranks": 0, "loss_rank_avg": 0.24674925208091736, "step": 1595, "valid_targets_mean": 138.8, "valid_targets_min": 54 }, { "epoch": 6.639004149377593, "grad_norm": 5.561101995379885, "learning_rate": 3.307659673251595e-07, "loss": 0.2244, "loss_nan_ranks": 0, "loss_rank_avg": 0.177412211894989, "step": 1600, "valid_targets_mean": 117.6, "valid_targets_min": 60 }, { "epoch": 6.659751037344399, "grad_norm": 5.195861980789606, "learning_rate": 2.9433659503926623e-07, "loss": 0.2327, "loss_nan_ranks": 0, "loss_rank_avg": 0.25790226459503174, "step": 1605, "valid_targets_mean": 110.1, "valid_targets_min": 53 }, { "epoch": 6.680497925311204, "grad_norm": 3.911906512279716, "learning_rate": 2.6001722732358127e-07, "loss": 0.2035, "loss_nan_ranks": 0, "loss_rank_avg": 0.14186334609985352, "step": 1610, "valid_targets_mean": 141.2, "valid_targets_min": 67 }, { "epoch": 6.7012448132780085, "grad_norm": 5.042285108929284, "learning_rate": 2.27811538960383e-07, "loss": 0.1913, "loss_nan_ranks": 0, "loss_rank_avg": 0.2191961407661438, "step": 1615, "valid_targets_mean": 123.7, "valid_targets_min": 44 }, { "epoch": 6.721991701244813, "grad_norm": 5.239166381330431, "learning_rate": 1.9772297840752407e-07, "loss": 0.1967, "loss_nan_ranks": 0, "loss_rank_avg": 0.19531618058681488, "step": 1620, "valid_targets_mean": 111.1, "valid_targets_min": 61 }, { "epoch": 6.742738589211618, "grad_norm": 4.934480610164203, "learning_rate": 1.6975476742916886e-07, "loss": 0.2021, "loss_nan_ranks": 0, "loss_rank_avg": 0.21878521144390106, "step": 1625, "valid_targets_mean": 101.4, "valid_targets_min": 50 }, { "epoch": 6.763485477178423, "grad_norm": 4.895769325490044, "learning_rate": 1.43909900750836e-07, "loss": 0.2313, "loss_nan_ranks": 0, "loss_rank_avg": 0.23424255847930908, "step": 1630, "valid_targets_mean": 120.0, "valid_targets_min": 60 }, { "epoch": 6.784232365145228, "grad_norm": 3.8671281371679322, "learning_rate": 1.2019114573871947e-07, "loss": 0.1937, "loss_nan_ranks": 0, "loss_rank_avg": 0.17272061109542847, "step": 1635, "valid_targets_mean": 148.1, "valid_targets_min": 62 }, { "epoch": 6.804979253112033, "grad_norm": 4.4379640165652345, "learning_rate": 9.860104210338562e-08, "loss": 0.1573, "loss_nan_ranks": 0, "loss_rank_avg": 0.18832272291183472, "step": 1640, "valid_targets_mean": 119.2, "valid_targets_min": 70 }, { "epoch": 6.825726141078838, "grad_norm": 3.06556856776517, "learning_rate": 7.914190162781277e-08, "loss": 0.1822, "loss_nan_ranks": 0, "loss_rank_avg": 0.12091746181249619, "step": 1645, "valid_targets_mean": 182.6, "valid_targets_min": 54 }, { "epoch": 6.846473029045643, "grad_norm": 4.099005450803784, "learning_rate": 6.181580791987385e-08, "loss": 0.2129, "loss_nan_ranks": 0, "loss_rank_avg": 0.1655721664428711, "step": 1650, "valid_targets_mean": 148.9, "valid_targets_min": 56 }, { "epoch": 6.867219917012449, "grad_norm": 4.310902974974436, "learning_rate": 4.6624616189214765e-08, "loss": 0.1678, "loss_nan_ranks": 0, "loss_rank_avg": 0.15827462077140808, "step": 1655, "valid_targets_mean": 143.8, "valid_targets_min": 61 }, { "epoch": 6.8879668049792535, "grad_norm": 5.3122495662963685, "learning_rate": 3.3569953048624426e-08, "loss": 0.1979, "loss_nan_ranks": 0, "loss_rank_avg": 0.25778818130493164, "step": 1660, "valid_targets_mean": 134.8, "valid_targets_min": 45 }, { "epoch": 6.908713692946058, "grad_norm": 7.693391989415569, "learning_rate": 2.2653216339840746e-08, "loss": 0.2183, "loss_nan_ranks": 0, "loss_rank_avg": 0.21810811758041382, "step": 1665, "valid_targets_mean": 94.8, "valid_targets_min": 20 }, { "epoch": 6.929460580912863, "grad_norm": 5.236163106459915, "learning_rate": 1.3875574983894802e-08, "loss": 0.1777, "loss_nan_ranks": 0, "loss_rank_avg": 0.22439393401145935, "step": 1670, "valid_targets_mean": 117.6, "valid_targets_min": 62 }, { "epoch": 6.950207468879668, "grad_norm": 3.8797498698175685, "learning_rate": 7.237968855937638e-09, "loss": 0.1835, "loss_nan_ranks": 0, "loss_rank_avg": 0.14645695686340332, "step": 1675, "valid_targets_mean": 146.5, "valid_targets_min": 64 }, { "epoch": 6.970954356846473, "grad_norm": 4.609262542128345, "learning_rate": 2.7411086846051984e-09, "loss": 0.2133, "loss_nan_ranks": 0, "loss_rank_avg": 0.1896577924489975, "step": 1680, "valid_targets_mean": 98.5, "valid_targets_min": 46 }, { "epoch": 6.991701244813278, "grad_norm": 2.6565371089348058, "learning_rate": 3.8547597591254147e-10, "loss": 0.2164, "loss_nan_ranks": 0, "loss_rank_avg": 0.11129122227430344, "step": 1685, "valid_targets_mean": 239.2, "valid_targets_min": 57 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.3174639642238617, "step": 1687, "total_flos": 22338300149760.0, "train_loss": 0.652452801803053, "train_runtime": 3967.747, "train_samples_per_second": 6.796, "train_steps_per_second": 0.425, "valid_targets_mean": 94.8, "valid_targets_min": 62 } ], "logging_steps": 5, "max_steps": 1687, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 22338300149760.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }