{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 833, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04219409282700422, "grad_norm": 13.737735517308288, "learning_rate": 1.904761904761905e-06, "loss": 0.8138, "loss_nan_ranks": 0, "loss_rank_avg": 0.42325860261917114, "step": 5, "valid_targets_mean": 9300.0, "valid_targets_min": 7296 }, { "epoch": 0.08438818565400844, "grad_norm": 8.598223125161901, "learning_rate": 4.2857142857142855e-06, "loss": 0.7793, "loss_nan_ranks": 0, "loss_rank_avg": 0.38333451747894287, "step": 10, "valid_targets_mean": 9275.5, "valid_targets_min": 7158 }, { "epoch": 0.12658227848101267, "grad_norm": 3.908004842191187, "learning_rate": 6.666666666666667e-06, "loss": 0.6819, "loss_nan_ranks": 0, "loss_rank_avg": 0.34921586513519287, "step": 15, "valid_targets_mean": 8389.4, "valid_targets_min": 6233 }, { "epoch": 0.16877637130801687, "grad_norm": 2.3954669922452503, "learning_rate": 9.047619047619049e-06, "loss": 0.6067, "loss_nan_ranks": 0, "loss_rank_avg": 0.2741207480430603, "step": 20, "valid_targets_mean": 8933.9, "valid_targets_min": 6921 }, { "epoch": 0.2109704641350211, "grad_norm": 1.262022302568234, "learning_rate": 1.1428571428571429e-05, "loss": 0.5663, "loss_nan_ranks": 0, "loss_rank_avg": 0.27063384652137756, "step": 25, "valid_targets_mean": 8693.9, "valid_targets_min": 5525 }, { "epoch": 0.25316455696202533, "grad_norm": 0.9175876804234998, "learning_rate": 1.3809523809523811e-05, "loss": 0.5036, "loss_nan_ranks": 0, "loss_rank_avg": 0.24980702996253967, "step": 30, "valid_targets_mean": 9026.4, "valid_targets_min": 7000 }, { "epoch": 0.29535864978902954, "grad_norm": 0.7282592599899416, "learning_rate": 1.6190476190476193e-05, "loss": 0.4921, "loss_nan_ranks": 0, "loss_rank_avg": 0.22725015878677368, "step": 35, "valid_targets_mean": 8819.1, "valid_targets_min": 7233 }, { "epoch": 0.33755274261603374, "grad_norm": 0.5629122844919103, "learning_rate": 1.8571428571428575e-05, "loss": 0.4657, "loss_nan_ranks": 0, "loss_rank_avg": 0.21749253571033478, "step": 40, "valid_targets_mean": 9115.0, "valid_targets_min": 6864 }, { "epoch": 0.379746835443038, "grad_norm": 0.5170770216436973, "learning_rate": 2.0952380952380954e-05, "loss": 0.4481, "loss_nan_ranks": 0, "loss_rank_avg": 0.2065398395061493, "step": 45, "valid_targets_mean": 8547.1, "valid_targets_min": 6900 }, { "epoch": 0.4219409282700422, "grad_norm": 0.5034342142568142, "learning_rate": 2.3333333333333336e-05, "loss": 0.4218, "loss_nan_ranks": 0, "loss_rank_avg": 0.1998848021030426, "step": 50, "valid_targets_mean": 7835.2, "valid_targets_min": 4698 }, { "epoch": 0.4641350210970464, "grad_norm": 0.4545350747053312, "learning_rate": 2.5714285714285718e-05, "loss": 0.4183, "loss_nan_ranks": 0, "loss_rank_avg": 0.21893973648548126, "step": 55, "valid_targets_mean": 9180.1, "valid_targets_min": 7051 }, { "epoch": 0.5063291139240507, "grad_norm": 0.44398357660292653, "learning_rate": 2.8095238095238096e-05, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.19019050896167755, "step": 60, "valid_targets_mean": 8196.6, "valid_targets_min": 1389 }, { "epoch": 0.5485232067510548, "grad_norm": 0.43196150110984444, "learning_rate": 3.047619047619048e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.18999773263931274, "step": 65, "valid_targets_mean": 8886.4, "valid_targets_min": 5947 }, { "epoch": 0.5907172995780591, "grad_norm": 0.4356694664798871, "learning_rate": 3.285714285714286e-05, "loss": 0.3694, "loss_nan_ranks": 0, "loss_rank_avg": 0.18235838413238525, "step": 70, "valid_targets_mean": 8577.5, "valid_targets_min": 5854 }, { "epoch": 0.6329113924050633, "grad_norm": 0.4879795309609486, "learning_rate": 3.523809523809524e-05, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.2244984209537506, "step": 75, "valid_targets_mean": 9445.2, "valid_targets_min": 7589 }, { "epoch": 0.6751054852320675, "grad_norm": 0.4591287171448197, "learning_rate": 3.761904761904762e-05, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.19888997077941895, "step": 80, "valid_targets_mean": 8648.0, "valid_targets_min": 6995 }, { "epoch": 0.7172995780590717, "grad_norm": 0.43370135355956757, "learning_rate": 4e-05, "loss": 0.3363, "loss_nan_ranks": 0, "loss_rank_avg": 0.15653540194034576, "step": 85, "valid_targets_mean": 7976.5, "valid_targets_min": 5676 }, { "epoch": 0.759493670886076, "grad_norm": 0.43614169159655086, "learning_rate": 3.9995601949609725e-05, "loss": 0.3515, "loss_nan_ranks": 0, "loss_rank_avg": 0.16819864511489868, "step": 90, "valid_targets_mean": 8489.6, "valid_targets_min": 5651 }, { "epoch": 0.8016877637130801, "grad_norm": 0.47270502658473995, "learning_rate": 3.998240973272361e-05, "loss": 0.3544, "loss_nan_ranks": 0, "loss_rank_avg": 0.20371896028518677, "step": 95, "valid_targets_mean": 8980.9, "valid_targets_min": 5911 }, { "epoch": 0.8438818565400844, "grad_norm": 0.44366932292888417, "learning_rate": 3.996042915134512e-05, "loss": 0.355, "loss_nan_ranks": 0, "loss_rank_avg": 0.20148850977420807, "step": 100, "valid_targets_mean": 9147.2, "valid_targets_min": 6069 }, { "epoch": 0.8860759493670886, "grad_norm": 0.450738888687614, "learning_rate": 3.9929669872644716e-05, "loss": 0.3417, "loss_nan_ranks": 0, "loss_rank_avg": 0.13800635933876038, "step": 105, "valid_targets_mean": 6815.9, "valid_targets_min": 1124 }, { "epoch": 0.9282700421940928, "grad_norm": 0.41039396298425773, "learning_rate": 3.989014542470815e-05, "loss": 0.3378, "loss_nan_ranks": 0, "loss_rank_avg": 0.16703501343727112, "step": 110, "valid_targets_mean": 10058.6, "valid_targets_min": 8736 }, { "epoch": 0.9704641350210971, "grad_norm": 0.40658466169041685, "learning_rate": 3.98418731905868e-05, "loss": 0.3441, "loss_nan_ranks": 0, "loss_rank_avg": 0.1470247507095337, "step": 115, "valid_targets_mean": 8467.8, "valid_targets_min": 4203 }, { "epoch": 1.0084388185654007, "grad_norm": 0.42706146525288, "learning_rate": 3.978487440065248e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.1719146966934204, "step": 120, "valid_targets_mean": 9109.1, "valid_targets_min": 6322 }, { "epoch": 1.0506329113924051, "grad_norm": 0.3862263984474135, "learning_rate": 3.9719174123260214e-05, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.15366646647453308, "step": 125, "valid_targets_mean": 9161.2, "valid_targets_min": 6619 }, { "epoch": 1.0928270042194093, "grad_norm": 0.46771718995267086, "learning_rate": 3.964480125372307e-05, "loss": 0.3114, "loss_nan_ranks": 0, "loss_rank_avg": 0.1375672072172165, "step": 130, "valid_targets_mean": 7859.0, "valid_targets_min": 1079 }, { "epoch": 1.1350210970464134, "grad_norm": 0.43449504183040244, "learning_rate": 3.9561788501603836e-05, "loss": 0.3161, "loss_nan_ranks": 0, "loss_rank_avg": 0.12888583540916443, "step": 135, "valid_targets_mean": 7176.8, "valid_targets_min": 1462 }, { "epoch": 1.1772151898734178, "grad_norm": 0.43422331056103003, "learning_rate": 3.94701723763292e-05, "loss": 0.3225, "loss_nan_ranks": 0, "loss_rank_avg": 0.15962457656860352, "step": 140, "valid_targets_mean": 8854.5, "valid_targets_min": 6056 }, { "epoch": 1.219409282700422, "grad_norm": 0.47515202494067976, "learning_rate": 3.936999317113271e-05, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.1411084532737732, "step": 145, "valid_targets_mean": 8753.8, "valid_targets_min": 5301 }, { "epoch": 1.261603375527426, "grad_norm": 0.40784537683056815, "learning_rate": 3.926129494533362e-05, "loss": 0.3125, "loss_nan_ranks": 0, "loss_rank_avg": 0.14677849411964417, "step": 150, "valid_targets_mean": 8915.6, "valid_targets_min": 5868 }, { "epoch": 1.3037974683544304, "grad_norm": 0.42669566237324436, "learning_rate": 3.914412550495937e-05, "loss": 0.3189, "loss_nan_ranks": 0, "loss_rank_avg": 0.13394805788993835, "step": 155, "valid_targets_mean": 7279.6, "valid_targets_min": 1145 }, { "epoch": 1.3459915611814346, "grad_norm": 0.41003883522018736, "learning_rate": 3.901853638172025e-05, "loss": 0.3179, "loss_nan_ranks": 0, "loss_rank_avg": 0.16570201516151428, "step": 160, "valid_targets_mean": 9676.4, "valid_targets_min": 8776 }, { "epoch": 1.3881856540084387, "grad_norm": 0.4090145854881866, "learning_rate": 3.888458281034551e-05, "loss": 0.2767, "loss_nan_ranks": 0, "loss_rank_avg": 0.13220128417015076, "step": 165, "valid_targets_mean": 8463.6, "valid_targets_min": 352 }, { "epoch": 1.4303797468354431, "grad_norm": 0.4158827283938909, "learning_rate": 3.874232370429085e-05, "loss": 0.3099, "loss_nan_ranks": 0, "loss_rank_avg": 0.1609145700931549, "step": 170, "valid_targets_mean": 9337.5, "valid_targets_min": 8545 }, { "epoch": 1.4725738396624473, "grad_norm": 0.4624418763799199, "learning_rate": 3.8591821629827946e-05, "loss": 0.3101, "loss_nan_ranks": 0, "loss_rank_avg": 0.15375180542469025, "step": 175, "valid_targets_mean": 8644.0, "valid_targets_min": 7177 }, { "epoch": 1.5147679324894514, "grad_norm": 0.44905907331860323, "learning_rate": 3.8433142778527536e-05, "loss": 0.3149, "loss_nan_ranks": 0, "loss_rank_avg": 0.13599535822868347, "step": 180, "valid_targets_mean": 7853.4, "valid_targets_min": 4716 }, { "epoch": 1.5569620253164556, "grad_norm": 0.43202384347831446, "learning_rate": 3.826635693814801e-05, "loss": 0.3003, "loss_nan_ranks": 0, "loss_rank_avg": 0.14607158303260803, "step": 185, "valid_targets_mean": 7725.0, "valid_targets_min": 5193 }, { "epoch": 1.59915611814346, "grad_norm": 0.4250126556019548, "learning_rate": 3.8091537461942395e-05, "loss": 0.3262, "loss_nan_ranks": 0, "loss_rank_avg": 0.1532595157623291, "step": 190, "valid_targets_mean": 8313.8, "valid_targets_min": 1141 }, { "epoch": 1.6413502109704643, "grad_norm": 0.4401245618708091, "learning_rate": 3.7908761236397265e-05, "loss": 0.2855, "loss_nan_ranks": 0, "loss_rank_avg": 0.15331213176250458, "step": 195, "valid_targets_mean": 8088.1, "valid_targets_min": 4238 }, { "epoch": 1.6835443037974684, "grad_norm": 0.466081852025086, "learning_rate": 3.771810864741761e-05, "loss": 0.2953, "loss_nan_ranks": 0, "loss_rank_avg": 0.156716451048851, "step": 200, "valid_targets_mean": 9015.5, "valid_targets_min": 4203 }, { "epoch": 1.7257383966244726, "grad_norm": 0.44420961731157504, "learning_rate": 3.751966354497278e-05, "loss": 0.3126, "loss_nan_ranks": 0, "loss_rank_avg": 0.12232231348752975, "step": 205, "valid_targets_mean": 7068.8, "valid_targets_min": 1389 }, { "epoch": 1.7679324894514767, "grad_norm": 0.44094509994269193, "learning_rate": 3.7313513206218794e-05, "loss": 0.3115, "loss_nan_ranks": 0, "loss_rank_avg": 0.15219950675964355, "step": 210, "valid_targets_mean": 6983.6, "valid_targets_min": 1497 }, { "epoch": 1.810126582278481, "grad_norm": 0.4363001268331526, "learning_rate": 3.709974829711344e-05, "loss": 0.3157, "loss_nan_ranks": 0, "loss_rank_avg": 0.13169416785240173, "step": 215, "valid_targets_mean": 7780.9, "valid_targets_min": 1318 }, { "epoch": 1.8523206751054853, "grad_norm": 0.46903212787997367, "learning_rate": 3.6878462832540903e-05, "loss": 0.3129, "loss_nan_ranks": 0, "loss_rank_avg": 0.1425689458847046, "step": 220, "valid_targets_mean": 8625.2, "valid_targets_min": 6122 }, { "epoch": 1.8945147679324894, "grad_norm": 0.41310139602425183, "learning_rate": 3.6649754134963564e-05, "loss": 0.3011, "loss_nan_ranks": 0, "loss_rank_avg": 0.13354924321174622, "step": 225, "valid_targets_mean": 8469.6, "valid_targets_min": 4623 }, { "epoch": 1.9367088607594938, "grad_norm": 0.4087947045064101, "learning_rate": 3.64137227916191e-05, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.15488475561141968, "step": 230, "valid_targets_mean": 8825.4, "valid_targets_min": 6185 }, { "epoch": 1.978902953586498, "grad_norm": 0.4223091429231445, "learning_rate": 3.6170472610281674e-05, "loss": 0.2883, "loss_nan_ranks": 0, "loss_rank_avg": 0.11964912712574005, "step": 235, "valid_targets_mean": 7039.6, "valid_targets_min": 775 }, { "epoch": 2.0168776371308015, "grad_norm": 0.42112252103307546, "learning_rate": 3.592011057360678e-05, "loss": 0.2784, "loss_nan_ranks": 0, "loss_rank_avg": 0.1303289830684662, "step": 240, "valid_targets_mean": 7897.1, "valid_targets_min": 782 }, { "epoch": 2.059071729957806, "grad_norm": 0.4276205778137949, "learning_rate": 3.5662746792079726e-05, "loss": 0.2875, "loss_nan_ranks": 0, "loss_rank_avg": 0.14901086688041687, "step": 245, "valid_targets_mean": 8519.6, "valid_targets_min": 1031 }, { "epoch": 2.1012658227848102, "grad_norm": 0.4089545502307839, "learning_rate": 3.53984944555885e-05, "loss": 0.2765, "loss_nan_ranks": 0, "loss_rank_avg": 0.14145910739898682, "step": 250, "valid_targets_mean": 9206.6, "valid_targets_min": 7423 }, { "epoch": 2.1434599156118144, "grad_norm": 0.495227570105931, "learning_rate": 3.512746978364227e-05, "loss": 0.2902, "loss_nan_ranks": 0, "loss_rank_avg": 0.15283076465129852, "step": 255, "valid_targets_mean": 8669.4, "valid_targets_min": 5525 }, { "epoch": 2.1856540084388185, "grad_norm": 0.42413672320413554, "learning_rate": 3.484979197425745e-05, "loss": 0.287, "loss_nan_ranks": 0, "loss_rank_avg": 0.11562255024909973, "step": 260, "valid_targets_mean": 8506.1, "valid_targets_min": 5868 }, { "epoch": 2.2278481012658227, "grad_norm": 0.4429223995215785, "learning_rate": 3.456558315153382e-05, "loss": 0.278, "loss_nan_ranks": 0, "loss_rank_avg": 0.11651501059532166, "step": 265, "valid_targets_mean": 7150.5, "valid_targets_min": 850 }, { "epoch": 2.270042194092827, "grad_norm": 0.3916309614586102, "learning_rate": 3.427496831194378e-05, "loss": 0.2854, "loss_nan_ranks": 0, "loss_rank_avg": 0.11857372522354126, "step": 270, "valid_targets_mean": 9030.6, "valid_targets_min": 7393 }, { "epoch": 2.3122362869198314, "grad_norm": 0.4408587328354608, "learning_rate": 3.3978075269358175e-05, "loss": 0.2797, "loss_nan_ranks": 0, "loss_rank_avg": 0.14032971858978271, "step": 275, "valid_targets_mean": 8928.5, "valid_targets_min": 6624 }, { "epoch": 2.3544303797468356, "grad_norm": 0.5255984072468615, "learning_rate": 3.3675034598833196e-05, "loss": 0.286, "loss_nan_ranks": 0, "loss_rank_avg": 0.1699741780757904, "step": 280, "valid_targets_mean": 9138.8, "valid_targets_min": 7798 }, { "epoch": 2.3966244725738397, "grad_norm": 0.46154900583042857, "learning_rate": 3.3365979579182774e-05, "loss": 0.2841, "loss_nan_ranks": 0, "loss_rank_avg": 0.1370779126882553, "step": 285, "valid_targets_mean": 7857.2, "valid_targets_min": 5341 }, { "epoch": 2.438818565400844, "grad_norm": 0.410815728442414, "learning_rate": 3.3051046134361874e-05, "loss": 0.2717, "loss_nan_ranks": 0, "loss_rank_avg": 0.13771235942840576, "step": 290, "valid_targets_mean": 8922.4, "valid_targets_min": 5929 }, { "epoch": 2.481012658227848, "grad_norm": 0.42579984902443774, "learning_rate": 3.27303727736865e-05, "loss": 0.2703, "loss_nan_ranks": 0, "loss_rank_avg": 0.150842547416687, "step": 295, "valid_targets_mean": 9481.6, "valid_targets_min": 8016 }, { "epoch": 2.523206751054852, "grad_norm": 0.45157363816682605, "learning_rate": 3.2404100530916555e-05, "loss": 0.2754, "loss_nan_ranks": 0, "loss_rank_avg": 0.12719795107841492, "step": 300, "valid_targets_mean": 8190.1, "valid_targets_min": 6069 }, { "epoch": 2.5654008438818563, "grad_norm": 0.429470249992375, "learning_rate": 3.20723729022285e-05, "loss": 0.2699, "loss_nan_ranks": 0, "loss_rank_avg": 0.10490024089813232, "step": 305, "valid_targets_mean": 7513.5, "valid_targets_min": 1012 }, { "epoch": 2.607594936708861, "grad_norm": 0.44240780660157275, "learning_rate": 3.173533578310503e-05, "loss": 0.2846, "loss_nan_ranks": 0, "loss_rank_avg": 0.15472307801246643, "step": 310, "valid_targets_mean": 9026.9, "valid_targets_min": 6796 }, { "epoch": 2.649789029535865, "grad_norm": 0.46171787256178776, "learning_rate": 3.139313740416945e-05, "loss": 0.2734, "loss_nan_ranks": 0, "loss_rank_avg": 0.14282463490962982, "step": 315, "valid_targets_mean": 8840.5, "valid_targets_min": 5948 }, { "epoch": 2.691983122362869, "grad_norm": 0.5596242010714433, "learning_rate": 3.104592826599319e-05, "loss": 0.2682, "loss_nan_ranks": 0, "loss_rank_avg": 0.13433107733726501, "step": 320, "valid_targets_mean": 8862.6, "valid_targets_min": 7327 }, { "epoch": 2.7341772151898733, "grad_norm": 0.41372708901928346, "learning_rate": 3.06938610729048e-05, "loss": 0.2744, "loss_nan_ranks": 0, "loss_rank_avg": 0.13239914178848267, "step": 325, "valid_targets_mean": 8498.8, "valid_targets_min": 5562 }, { "epoch": 2.7763713080168775, "grad_norm": 1.6545728711184768, "learning_rate": 3.0337090665829883e-05, "loss": 0.2861, "loss_nan_ranks": 0, "loss_rank_avg": 0.1271859109401703, "step": 330, "valid_targets_mean": 8136.5, "valid_targets_min": 5643 }, { "epoch": 2.818565400843882, "grad_norm": 0.4317369059905672, "learning_rate": 2.9975773954191246e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.16068033874034882, "step": 335, "valid_targets_mean": 9313.6, "valid_targets_min": 8083 }, { "epoch": 2.8607594936708862, "grad_norm": 0.4361200075559799, "learning_rate": 2.9610069846899355e-05, "loss": 0.2798, "loss_nan_ranks": 0, "loss_rank_avg": 0.12818226218223572, "step": 340, "valid_targets_mean": 8236.2, "valid_targets_min": 1372 }, { "epoch": 2.9029535864978904, "grad_norm": 0.41932505340642173, "learning_rate": 2.9240139182463384e-05, "loss": 0.2798, "loss_nan_ranks": 0, "loss_rank_avg": 0.13603907823562622, "step": 345, "valid_targets_mean": 8153.4, "valid_targets_min": 1722 }, { "epoch": 2.9451476793248945, "grad_norm": 0.4285742105739042, "learning_rate": 2.8866144658253642e-05, "loss": 0.2826, "loss_nan_ranks": 0, "loss_rank_avg": 0.13186733424663544, "step": 350, "valid_targets_mean": 7989.5, "valid_targets_min": 4996 }, { "epoch": 2.9873417721518987, "grad_norm": 0.46406691637390524, "learning_rate": 2.8488250758946453e-05, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.14630965888500214, "step": 355, "valid_targets_mean": 8137.5, "valid_targets_min": 3982 }, { "epoch": 3.0253164556962027, "grad_norm": 0.43995204460384685, "learning_rate": 2.8106623684182944e-05, "loss": 0.2549, "loss_nan_ranks": 0, "loss_rank_avg": 0.13689684867858887, "step": 360, "valid_targets_mean": 8913.4, "valid_targets_min": 7312 }, { "epoch": 3.067510548523207, "grad_norm": 0.45122017845034534, "learning_rate": 2.7721431275473634e-05, "loss": 0.2587, "loss_nan_ranks": 0, "loss_rank_avg": 0.13735206425189972, "step": 365, "valid_targets_mean": 8872.2, "valid_targets_min": 4723 }, { "epoch": 3.109704641350211, "grad_norm": 0.41848489063381833, "learning_rate": 2.733284294238086e-05, "loss": 0.2606, "loss_nan_ranks": 0, "loss_rank_avg": 0.10563844442367554, "step": 370, "valid_targets_mean": 8790.6, "valid_targets_min": 6799 }, { "epoch": 3.151898734177215, "grad_norm": 0.48612738303501324, "learning_rate": 2.694102958801163e-05, "loss": 0.261, "loss_nan_ranks": 0, "loss_rank_avg": 0.13847270607948303, "step": 375, "valid_targets_mean": 7784.8, "valid_targets_min": 1899 }, { "epoch": 3.1940928270042193, "grad_norm": 0.43439045015611505, "learning_rate": 2.654616353385354e-05, "loss": 0.2545, "loss_nan_ranks": 0, "loss_rank_avg": 0.11931806802749634, "step": 380, "valid_targets_mean": 8220.5, "valid_targets_min": 6466 }, { "epoch": 3.2362869198312234, "grad_norm": 0.4672629305905832, "learning_rate": 2.6148418443986967e-05, "loss": 0.2573, "loss_nan_ranks": 0, "loss_rank_avg": 0.12390561401844025, "step": 385, "valid_targets_mean": 8790.5, "valid_targets_min": 7425 }, { "epoch": 3.278481012658228, "grad_norm": 0.4814651192611666, "learning_rate": 2.5747969248706675e-05, "loss": 0.2647, "loss_nan_ranks": 0, "loss_rank_avg": 0.15146447718143463, "step": 390, "valid_targets_mean": 9032.1, "valid_targets_min": 5188 }, { "epoch": 3.320675105485232, "grad_norm": 0.4374830944824983, "learning_rate": 2.5344992067586623e-05, "loss": 0.2647, "loss_nan_ranks": 0, "loss_rank_avg": 0.13405266404151917, "step": 395, "valid_targets_mean": 8223.2, "valid_targets_min": 3452 }, { "epoch": 3.3628691983122363, "grad_norm": 0.4732142324779574, "learning_rate": 2.4939664132021685e-05, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.10236911475658417, "step": 400, "valid_targets_mean": 7064.0, "valid_targets_min": 1079 }, { "epoch": 3.4050632911392404, "grad_norm": 0.43954374313846106, "learning_rate": 2.4532163707280372e-05, "loss": 0.2628, "loss_nan_ranks": 0, "loss_rank_avg": 0.12227586656808853, "step": 405, "valid_targets_mean": 8465.5, "valid_targets_min": 6739 }, { "epoch": 3.4472573839662446, "grad_norm": 0.42292011324295486, "learning_rate": 2.4122670014102905e-05, "loss": 0.262, "loss_nan_ranks": 0, "loss_rank_avg": 0.11511904001235962, "step": 410, "valid_targets_mean": 9274.5, "valid_targets_min": 6734 }, { "epoch": 3.489451476793249, "grad_norm": 0.4344835895735963, "learning_rate": 2.371136314987898e-05, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.14791841804981232, "step": 415, "valid_targets_mean": 8766.1, "valid_targets_min": 6028 }, { "epoch": 3.5316455696202533, "grad_norm": 0.4261397076907197, "learning_rate": 2.329842400944008e-05, "loss": 0.2528, "loss_nan_ranks": 0, "loss_rank_avg": 0.10815391689538956, "step": 420, "valid_targets_mean": 8315.0, "valid_targets_min": 7097 }, { "epoch": 3.5738396624472575, "grad_norm": 0.5622442435750628, "learning_rate": 2.2884034205500977e-05, "loss": 0.2575, "loss_nan_ranks": 0, "loss_rank_avg": 0.13450974225997925, "step": 425, "valid_targets_mean": 8429.6, "valid_targets_min": 6223 }, { "epoch": 3.6160337552742616, "grad_norm": 0.45056620552671345, "learning_rate": 2.246837598878557e-05, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.12603911757469177, "step": 430, "valid_targets_mean": 8927.6, "valid_targets_min": 5603 }, { "epoch": 3.6582278481012658, "grad_norm": 0.44591198215906896, "learning_rate": 2.2051632167872072e-05, "loss": 0.2645, "loss_nan_ranks": 0, "loss_rank_avg": 0.16440054774284363, "step": 435, "valid_targets_mean": 9716.9, "valid_targets_min": 8934 }, { "epoch": 3.70042194092827, "grad_norm": 0.4244520408699371, "learning_rate": 2.1633986028792915e-05, "loss": 0.2468, "loss_nan_ranks": 0, "loss_rank_avg": 0.12980708479881287, "step": 440, "valid_targets_mean": 9191.9, "valid_targets_min": 7691 }, { "epoch": 3.742616033755274, "grad_norm": 0.4400154252728221, "learning_rate": 2.1215621254424592e-05, "loss": 0.2621, "loss_nan_ranks": 0, "loss_rank_avg": 0.14869439601898193, "step": 445, "valid_targets_mean": 9304.6, "valid_targets_min": 8242 }, { "epoch": 3.7848101265822782, "grad_norm": 0.4312069617379547, "learning_rate": 2.0796721843703028e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.14449357986450195, "step": 450, "valid_targets_mean": 8888.6, "valid_targets_min": 4545 }, { "epoch": 3.827004219409283, "grad_norm": 0.6226769603382436, "learning_rate": 2.0377472030699895e-05, "loss": 0.2561, "loss_nan_ranks": 0, "loss_rank_avg": 0.13251638412475586, "step": 455, "valid_targets_mean": 9287.4, "valid_targets_min": 7232 }, { "epoch": 3.869198312236287, "grad_norm": 0.41779119755506317, "learning_rate": 1.995805620359557e-05, "loss": 0.2535, "loss_nan_ranks": 0, "loss_rank_avg": 0.11737249791622162, "step": 460, "valid_targets_mean": 8372.5, "valid_targets_min": 6429 }, { "epoch": 3.911392405063291, "grad_norm": 0.42919575535179477, "learning_rate": 1.9538658823584258e-05, "loss": 0.2492, "loss_nan_ranks": 0, "loss_rank_avg": 0.12399604916572571, "step": 465, "valid_targets_mean": 8499.9, "valid_targets_min": 7146 }, { "epoch": 3.9535864978902953, "grad_norm": 0.4282733272450913, "learning_rate": 1.9119464343747048e-05, "loss": 0.2576, "loss_nan_ranks": 0, "loss_rank_avg": 0.12035520374774933, "step": 470, "valid_targets_mean": 9111.9, "valid_targets_min": 7021 }, { "epoch": 3.9957805907173, "grad_norm": 0.4688557446235436, "learning_rate": 1.8700657127928495e-05, "loss": 0.2579, "loss_nan_ranks": 0, "loss_rank_avg": 0.14246785640716553, "step": 475, "valid_targets_mean": 8446.9, "valid_targets_min": 3548 }, { "epoch": 4.033755274261603, "grad_norm": 0.4179516240442328, "learning_rate": 1.8282421369652514e-05, "loss": 0.2473, "loss_nan_ranks": 0, "loss_rank_avg": 0.12743636965751648, "step": 480, "valid_targets_mean": 8593.9, "valid_targets_min": 6807 }, { "epoch": 4.075949367088608, "grad_norm": 0.4159181042353588, "learning_rate": 1.786494101111308e-05, "loss": 0.2435, "loss_nan_ranks": 0, "loss_rank_avg": 0.12010753154754639, "step": 485, "valid_targets_mean": 9153.5, "valid_targets_min": 7211 }, { "epoch": 4.118143459915612, "grad_norm": 0.43857477399241507, "learning_rate": 1.7448399662275577e-05, "loss": 0.2358, "loss_nan_ranks": 0, "loss_rank_avg": 0.11870156228542328, "step": 490, "valid_targets_mean": 8510.8, "valid_targets_min": 5260 }, { "epoch": 4.160337552742616, "grad_norm": 0.7799080736665794, "learning_rate": 1.70329805201242e-05, "loss": 0.2587, "loss_nan_ranks": 0, "loss_rank_avg": 0.13719968497753143, "step": 495, "valid_targets_mean": 8487.0, "valid_targets_min": 5677 }, { "epoch": 4.2025316455696204, "grad_norm": 0.4292260280318674, "learning_rate": 1.661886628809096e-05, "loss": 0.2417, "loss_nan_ranks": 0, "loss_rank_avg": 0.13319876790046692, "step": 500, "valid_targets_mean": 9258.0, "valid_targets_min": 7850 }, { "epoch": 4.244725738396625, "grad_norm": 0.47075531459044107, "learning_rate": 1.620623909570185e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.12588399648666382, "step": 505, "valid_targets_mean": 7839.9, "valid_targets_min": 709 }, { "epoch": 4.286919831223629, "grad_norm": 0.4443285639795416, "learning_rate": 1.5795280418475313e-05, "loss": 0.2501, "loss_nan_ranks": 0, "loss_rank_avg": 0.12543919682502747, "step": 510, "valid_targets_mean": 9025.4, "valid_targets_min": 6790 }, { "epoch": 4.329113924050633, "grad_norm": 0.45446326155202266, "learning_rate": 1.5386170998108432e-05, "loss": 0.2508, "loss_nan_ranks": 0, "loss_rank_avg": 0.12875355780124664, "step": 515, "valid_targets_mean": 8992.2, "valid_targets_min": 7538 }, { "epoch": 4.371308016877637, "grad_norm": 0.4188354388664049, "learning_rate": 1.4979090762985793e-05, "loss": 0.2393, "loss_nan_ranks": 0, "loss_rank_avg": 0.1216769590973854, "step": 520, "valid_targets_mean": 9567.9, "valid_targets_min": 8414 }, { "epoch": 4.413502109704641, "grad_norm": 0.5144343057216146, "learning_rate": 1.4574218749046097e-05, "loss": 0.2479, "loss_nan_ranks": 0, "loss_rank_avg": 0.10991360992193222, "step": 525, "valid_targets_mean": 8216.4, "valid_targets_min": 5783 }, { "epoch": 4.455696202531645, "grad_norm": 0.45077488627425316, "learning_rate": 1.4171733021041236e-05, "loss": 0.2522, "loss_nan_ranks": 0, "loss_rank_avg": 0.12308443337678909, "step": 530, "valid_targets_mean": 8366.5, "valid_targets_min": 5486 }, { "epoch": 4.4978902953586495, "grad_norm": 0.4438440275448954, "learning_rate": 1.3771810594222522e-05, "loss": 0.2438, "loss_nan_ranks": 0, "loss_rank_avg": 0.1209210604429245, "step": 535, "valid_targets_mean": 8559.5, "valid_targets_min": 6739 }, { "epoch": 4.540084388185654, "grad_norm": 0.4138439523530891, "learning_rate": 1.3374627356488486e-05, "loss": 0.2346, "loss_nan_ranks": 0, "loss_rank_avg": 0.13088755309581757, "step": 540, "valid_targets_mean": 9257.6, "valid_targets_min": 5925 }, { "epoch": 4.582278481012658, "grad_norm": 0.4348705477932507, "learning_rate": 1.2980357991028504e-05, "loss": 0.2628, "loss_nan_ranks": 0, "loss_rank_avg": 0.12444625794887543, "step": 545, "valid_targets_mean": 9086.5, "valid_targets_min": 7240 }, { "epoch": 4.624472573839663, "grad_norm": 0.4514383858436243, "learning_rate": 1.2589175899496241e-05, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.14704394340515137, "step": 550, "valid_targets_mean": 9210.2, "valid_targets_min": 7000 }, { "epoch": 4.666666666666667, "grad_norm": 0.43305229013411795, "learning_rate": 1.2201253125746733e-05, "loss": 0.2471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1392921805381775, "step": 555, "valid_targets_mean": 9039.9, "valid_targets_min": 7246 }, { "epoch": 4.708860759493671, "grad_norm": 0.4135427416829583, "learning_rate": 1.1816760280170615e-05, "loss": 0.2474, "loss_nan_ranks": 0, "loss_rank_avg": 0.10349678993225098, "step": 560, "valid_targets_mean": 8758.5, "valid_targets_min": 6734 }, { "epoch": 4.751054852320675, "grad_norm": 0.4134170552023414, "learning_rate": 1.1435866464658856e-05, "loss": 0.236, "loss_nan_ranks": 0, "loss_rank_avg": 0.0989164412021637, "step": 565, "valid_targets_mean": 7774.6, "valid_targets_min": 1033 }, { "epoch": 4.793248945147679, "grad_norm": 0.4202770280254775, "learning_rate": 1.1058739198230856e-05, "loss": 0.2307, "loss_nan_ranks": 0, "loss_rank_avg": 0.10661663115024567, "step": 570, "valid_targets_mean": 8591.6, "valid_targets_min": 5296 }, { "epoch": 4.8354430379746836, "grad_norm": 0.42451909244520697, "learning_rate": 1.0685544343358729e-05, "loss": 0.2498, "loss_nan_ranks": 0, "loss_rank_avg": 0.12151402235031128, "step": 575, "valid_targets_mean": 9392.6, "valid_targets_min": 6279 }, { "epoch": 4.877637130801688, "grad_norm": 0.439789899437483, "learning_rate": 1.0316446033020198e-05, "loss": 0.2379, "loss_nan_ranks": 0, "loss_rank_avg": 0.09513577073812485, "step": 580, "valid_targets_mean": 8077.6, "valid_targets_min": 4893 }, { "epoch": 4.919831223628692, "grad_norm": 0.4316282443408433, "learning_rate": 9.951606598512042e-06, "loss": 0.258, "loss_nan_ranks": 0, "loss_rank_avg": 0.13961660861968994, "step": 585, "valid_targets_mean": 9551.0, "valid_targets_min": 8353 }, { "epoch": 4.962025316455696, "grad_norm": 0.44460163153797444, "learning_rate": 9.591186498056004e-06, "loss": 0.2328, "loss_nan_ranks": 0, "loss_rank_avg": 0.11063335835933685, "step": 590, "valid_targets_mean": 8310.2, "valid_targets_min": 5642 }, { "epoch": 5.0, "grad_norm": 0.6232611185111987, "learning_rate": 9.235344246228415e-06, "loss": 0.2463, "loss_nan_ranks": 0, "loss_rank_avg": 0.21027398109436035, "step": 595, "valid_targets_mean": 9007.9, "valid_targets_min": 7843 }, { "epoch": 5.042194092827004, "grad_norm": 0.45373400536723996, "learning_rate": 8.884236344244734e-06, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.11295656859874725, "step": 600, "valid_targets_mean": 8223.9, "valid_targets_min": 6369 }, { "epoch": 5.084388185654008, "grad_norm": 0.44188846780421875, "learning_rate": 8.538017211129501e-06, "loss": 0.2267, "loss_nan_ranks": 0, "loss_rank_avg": 0.09402458369731903, "step": 605, "valid_targets_mean": 8217.8, "valid_targets_min": 5192 }, { "epoch": 5.1265822784810124, "grad_norm": 0.4739239566419668, "learning_rate": 8.196839115802071e-06, "loss": 0.2416, "loss_nan_ranks": 0, "loss_rank_avg": 0.1246982291340828, "step": 610, "valid_targets_mean": 9335.2, "valid_targets_min": 7233 }, { "epoch": 5.168776371308017, "grad_norm": 0.41388443568870104, "learning_rate": 7.860852110107952e-06, "loss": 0.236, "loss_nan_ranks": 0, "loss_rank_avg": 0.1162976399064064, "step": 615, "valid_targets_mean": 8525.2, "valid_targets_min": 5947 }, { "epoch": 5.210970464135021, "grad_norm": 0.4584431908697567, "learning_rate": 7.530203962825331e-06, "loss": 0.2438, "loss_nan_ranks": 0, "loss_rank_avg": 0.11082412302494049, "step": 620, "valid_targets_mean": 8312.5, "valid_targets_min": 3548 }, { "epoch": 5.253164556962025, "grad_norm": 0.5075457556667611, "learning_rate": 7.205040094675502e-06, "loss": 0.2403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1264800876379013, "step": 625, "valid_targets_mean": 9783.8, "valid_targets_min": 8094 }, { "epoch": 5.29535864978903, "grad_norm": 0.43671359308707003, "learning_rate": 6.885503514366203e-06, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.1132010817527771, "step": 630, "valid_targets_mean": 8917.6, "valid_targets_min": 6726 }, { "epoch": 5.337552742616034, "grad_norm": 0.4375519850374428, "learning_rate": 6.571734755695584e-06, "loss": 0.23, "loss_nan_ranks": 0, "loss_rank_avg": 0.12824538350105286, "step": 635, "valid_targets_mean": 8502.2, "valid_targets_min": 1497 }, { "epoch": 5.379746835443038, "grad_norm": 0.4243347246282382, "learning_rate": 6.2638718157448285e-06, "loss": 0.2342, "loss_nan_ranks": 0, "loss_rank_avg": 0.11869437992572784, "step": 640, "valid_targets_mean": 9188.5, "valid_targets_min": 7034 }, { "epoch": 5.421940928270042, "grad_norm": 0.46745881796696226, "learning_rate": 5.962050094186236e-06, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.11892999708652496, "step": 645, "valid_targets_mean": 8592.6, "valid_targets_min": 6624 }, { "epoch": 5.4641350210970465, "grad_norm": 0.469810046983012, "learning_rate": 5.666402333733843e-06, "loss": 0.2372, "loss_nan_ranks": 0, "loss_rank_avg": 0.12667159736156464, "step": 650, "valid_targets_mean": 7721.2, "valid_targets_min": 5301 }, { "epoch": 5.506329113924051, "grad_norm": 0.440394400681658, "learning_rate": 5.377058561762474e-06, "loss": 0.2344, "loss_nan_ranks": 0, "loss_rank_avg": 0.09980838745832443, "step": 655, "valid_targets_mean": 7530.0, "valid_targets_min": 4441 }, { "epoch": 5.548523206751055, "grad_norm": 0.46129642001131443, "learning_rate": 5.094146033121057e-06, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.10793110728263855, "step": 660, "valid_targets_mean": 8216.8, "valid_targets_min": 4893 }, { "epoch": 5.590717299578059, "grad_norm": 0.48121058406225553, "learning_rate": 4.817789174165281e-06, "loss": 0.2572, "loss_nan_ranks": 0, "loss_rank_avg": 0.11774282157421112, "step": 665, "valid_targets_mean": 9377.8, "valid_targets_min": 7466 }, { "epoch": 5.632911392405063, "grad_norm": 0.47185562628992206, "learning_rate": 4.5481095280342925e-06, "loss": 0.2359, "loss_nan_ranks": 0, "loss_rank_avg": 0.10965608060359955, "step": 670, "valid_targets_mean": 7637.9, "valid_targets_min": 1141 }, { "epoch": 5.675105485232067, "grad_norm": 0.4605629407236577, "learning_rate": 4.285225701195383e-06, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.14336133003234863, "step": 675, "valid_targets_mean": 8773.4, "valid_targets_min": 5065 }, { "epoch": 5.717299578059071, "grad_norm": 0.39880864828224805, "learning_rate": 4.029253311280281e-06, "loss": 0.2439, "loss_nan_ranks": 0, "loss_rank_avg": 0.11196043342351913, "step": 680, "valid_targets_mean": 9331.1, "valid_targets_min": 7073 }, { "epoch": 5.759493670886076, "grad_norm": 0.435020554643124, "learning_rate": 3.7803049362359103e-06, "loss": 0.2333, "loss_nan_ranks": 0, "loss_rank_avg": 0.11478637158870697, "step": 685, "valid_targets_mean": 8947.9, "valid_targets_min": 8357 }, { "epoch": 5.80168776371308, "grad_norm": 0.4269181943885422, "learning_rate": 3.5384900648120814e-06, "loss": 0.2424, "loss_nan_ranks": 0, "loss_rank_avg": 0.1145181730389595, "step": 690, "valid_targets_mean": 9141.5, "valid_targets_min": 6753 }, { "epoch": 5.843881856540085, "grad_norm": 0.40001789612061567, "learning_rate": 3.3039150484077555e-06, "loss": 0.2305, "loss_nan_ranks": 0, "loss_rank_avg": 0.10709261149168015, "step": 695, "valid_targets_mean": 8723.4, "valid_targets_min": 5732 }, { "epoch": 5.886075949367089, "grad_norm": 0.4272824200506153, "learning_rate": 3.0766830542971826e-06, "loss": 0.2316, "loss_nan_ranks": 0, "loss_rank_avg": 0.08846732974052429, "step": 700, "valid_targets_mean": 7904.5, "valid_targets_min": 4238 }, { "epoch": 5.928270042194093, "grad_norm": 0.4240311265649667, "learning_rate": 2.856894020256389e-06, "loss": 0.2315, "loss_nan_ranks": 0, "loss_rank_avg": 0.10147963464260101, "step": 705, "valid_targets_mean": 7797.0, "valid_targets_min": 773 }, { "epoch": 5.970464135021097, "grad_norm": 0.44341039833186224, "learning_rate": 2.6446446106100855e-06, "loss": 0.2358, "loss_nan_ranks": 0, "loss_rank_avg": 0.11635808646678925, "step": 710, "valid_targets_mean": 8042.9, "valid_targets_min": 4819 }, { "epoch": 6.008438818565401, "grad_norm": 0.43454237089026004, "learning_rate": 2.4400281737181518e-06, "loss": 0.2485, "loss_nan_ranks": 0, "loss_rank_avg": 0.12369026243686676, "step": 715, "valid_targets_mean": 7799.2, "valid_targets_min": 1552 }, { "epoch": 6.050632911392405, "grad_norm": 0.4086719126374126, "learning_rate": 2.2431347009206107e-06, "loss": 0.2399, "loss_nan_ranks": 0, "loss_rank_avg": 0.11836208403110504, "step": 720, "valid_targets_mean": 8866.1, "valid_targets_min": 5937 }, { "epoch": 6.0928270042194095, "grad_norm": 0.47554127803953666, "learning_rate": 2.0540507869589365e-06, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.12429390847682953, "step": 725, "valid_targets_mean": 7500.9, "valid_targets_min": 1372 }, { "epoch": 6.135021097046414, "grad_norm": 0.4138302974266342, "learning_rate": 1.872859591891305e-06, "loss": 0.235, "loss_nan_ranks": 0, "loss_rank_avg": 0.10978598892688751, "step": 730, "valid_targets_mean": 9062.0, "valid_targets_min": 5651 }, { "epoch": 6.177215189873418, "grad_norm": 0.477745903678315, "learning_rate": 1.6996408045183255e-06, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.0997207909822464, "step": 735, "valid_targets_mean": 8074.8, "valid_targets_min": 4794 }, { "epoch": 6.219409282700422, "grad_norm": 0.39025085721052233, "learning_rate": 1.5344706073355386e-06, "loss": 0.2225, "loss_nan_ranks": 0, "loss_rank_avg": 0.1053914874792099, "step": 740, "valid_targets_mean": 9646.5, "valid_targets_min": 8467 }, { "epoch": 6.261603375527426, "grad_norm": 0.46907873784680004, "learning_rate": 1.377421643027963e-06, "loss": 0.2403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1020062193274498, "step": 745, "valid_targets_mean": 8838.5, "valid_targets_min": 5929 }, { "epoch": 6.30379746835443, "grad_norm": 0.4255302670132069, "learning_rate": 1.2285629825214817e-06, "loss": 0.242, "loss_nan_ranks": 0, "loss_rank_avg": 0.11597343534231186, "step": 750, "valid_targets_mean": 8416.2, "valid_targets_min": 6747 }, { "epoch": 6.345991561181434, "grad_norm": 0.4209806375747758, "learning_rate": 1.0879600946050783e-06, "loss": 0.2198, "loss_nan_ranks": 0, "loss_rank_avg": 0.10497762262821198, "step": 755, "valid_targets_mean": 8873.2, "valid_targets_min": 6692 }, { "epoch": 6.3881856540084385, "grad_norm": 0.3994620526352628, "learning_rate": 9.556748171373663e-07, "loss": 0.2363, "loss_nan_ranks": 0, "loss_rank_avg": 0.11500047892332077, "step": 760, "valid_targets_mean": 9397.5, "valid_targets_min": 6864 }, { "epoch": 6.430379746835443, "grad_norm": 0.48476265690454917, "learning_rate": 8.317653298499651e-07, "loss": 0.2378, "loss_nan_ranks": 0, "loss_rank_avg": 0.11502040922641754, "step": 765, "valid_targets_mean": 8142.6, "valid_targets_min": 1255 }, { "epoch": 6.472573839662447, "grad_norm": 0.4559778814783951, "learning_rate": 7.162861287597666e-07, "loss": 0.2408, "loss_nan_ranks": 0, "loss_rank_avg": 0.15468385815620422, "step": 770, "valid_targets_mean": 9261.8, "valid_targets_min": 6256 }, { "epoch": 6.514767932489452, "grad_norm": 0.40532634311815335, "learning_rate": 6.092880022013115e-07, "loss": 0.2402, "loss_nan_ranks": 0, "loss_rank_avg": 0.12497968971729279, "step": 775, "valid_targets_mean": 8960.5, "valid_targets_min": 5005 }, { "epoch": 6.556962025316456, "grad_norm": 0.43147417534943683, "learning_rate": 5.108180084898284e-07, "loss": 0.2321, "loss_nan_ranks": 0, "loss_rank_avg": 0.10713645815849304, "step": 780, "valid_targets_mean": 8562.0, "valid_targets_min": 5754 }, { "epoch": 6.59915611814346, "grad_norm": 0.4178467879199177, "learning_rate": 4.209194552247442e-07, "loss": 0.2387, "loss_nan_ranks": 0, "loss_rank_avg": 0.11786434799432755, "step": 785, "valid_targets_mean": 8937.8, "valid_targets_min": 6908 }, { "epoch": 6.641350210970464, "grad_norm": 0.4296586265719399, "learning_rate": 3.3963188024278783e-07, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.12450215220451355, "step": 790, "valid_targets_mean": 8674.1, "valid_targets_min": 5603 }, { "epoch": 6.6835443037974684, "grad_norm": 0.4648325639162333, "learning_rate": 2.6699103422904494e-07, "loss": 0.2329, "loss_nan_ranks": 0, "loss_rank_avg": 0.13395778834819794, "step": 795, "valid_targets_mean": 7350.9, "valid_targets_min": 1899 }, { "epoch": 6.725738396624473, "grad_norm": 0.4397893931280253, "learning_rate": 2.030288649936285e-07, "loss": 0.2385, "loss_nan_ranks": 0, "loss_rank_avg": 0.10574311763048172, "step": 800, "valid_targets_mean": 7157.0, "valid_targets_min": 850 }, { "epoch": 6.767932489451477, "grad_norm": 0.43270603582901673, "learning_rate": 1.477735034208805e-07, "loss": 0.2305, "loss_nan_ranks": 0, "loss_rank_avg": 0.12294603884220123, "step": 805, "valid_targets_mean": 8442.5, "valid_targets_min": 3355 }, { "epoch": 6.810126582278481, "grad_norm": 0.4125142256044064, "learning_rate": 1.0124925109725514e-07, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.14932481944561005, "step": 810, "valid_targets_mean": 9925.2, "valid_targets_min": 8745 }, { "epoch": 6.852320675105485, "grad_norm": 0.41761981991702074, "learning_rate": 6.347656962335524e-08, "loss": 0.226, "loss_nan_ranks": 0, "loss_rank_avg": 0.09456837177276611, "step": 815, "valid_targets_mean": 7658.6, "valid_targets_min": 4375 }, { "epoch": 6.894514767932489, "grad_norm": 0.5241537287811314, "learning_rate": 3.447207161483279e-08, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.10600066184997559, "step": 820, "valid_targets_mean": 7051.8, "valid_targets_min": 878 }, { "epoch": 6.936708860759493, "grad_norm": 0.42822827158756854, "learning_rate": 1.424851339606903e-08, "loss": 0.2264, "loss_nan_ranks": 0, "loss_rank_avg": 0.11232862621545792, "step": 825, "valid_targets_mean": 9165.9, "valid_targets_min": 7143 }, { "epoch": 6.978902953586498, "grad_norm": 0.4390460209806349, "learning_rate": 2.8147893898755605e-09, "loss": 0.2299, "loss_nan_ranks": 0, "loss_rank_avg": 0.10721021145582199, "step": 830, "valid_targets_mean": 9302.9, "valid_targets_min": 8063 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.22618243098258972, "step": 833, "total_flos": 6.212778952973353e+17, "train_loss": 0.28838339830790105, "train_runtime": 12171.8398, "train_samples_per_second": 1.088, "train_steps_per_second": 0.068, "valid_targets_mean": 9237.2, "valid_targets_min": 6486 } ], "logging_steps": 5, "max_steps": 833, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.212778952973353e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }