{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1435, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024390243902439025, "grad_norm": 11.487528759401481, "learning_rate": 1.111111111111111e-06, "loss": 0.8361, "loss_nan_ranks": 0, "loss_rank_avg": 0.7365100383758545, "step": 5, "valid_targets_mean": 3762.9, "valid_targets_min": 1347 }, { "epoch": 0.04878048780487805, "grad_norm": 9.475522028864606, "learning_rate": 2.5e-06, "loss": 0.8084, "loss_nan_ranks": 0, "loss_rank_avg": 0.780247688293457, "step": 10, "valid_targets_mean": 4102.0, "valid_targets_min": 1132 }, { "epoch": 0.07317073170731707, "grad_norm": 7.04279574223532, "learning_rate": 3.88888888888889e-06, "loss": 0.8018, "loss_nan_ranks": 0, "loss_rank_avg": 0.8029437065124512, "step": 15, "valid_targets_mean": 2743.9, "valid_targets_min": 1037 }, { "epoch": 0.0975609756097561, "grad_norm": 3.837408273985971, "learning_rate": 5.2777777777777785e-06, "loss": 0.7365, "loss_nan_ranks": 0, "loss_rank_avg": 0.7130635380744934, "step": 20, "valid_targets_mean": 3803.7, "valid_targets_min": 1728 }, { "epoch": 0.12195121951219512, "grad_norm": 2.623752522634304, "learning_rate": 6.666666666666667e-06, "loss": 0.6356, "loss_nan_ranks": 0, "loss_rank_avg": 0.5704323053359985, "step": 25, "valid_targets_mean": 3090.2, "valid_targets_min": 15 }, { "epoch": 0.14634146341463414, "grad_norm": 1.5000325065486164, "learning_rate": 8.055555555555557e-06, "loss": 0.5786, "loss_nan_ranks": 0, "loss_rank_avg": 0.5754331946372986, "step": 30, "valid_targets_mean": 2901.2, "valid_targets_min": 444 }, { "epoch": 0.17073170731707318, "grad_norm": 1.0195672514305119, "learning_rate": 9.444444444444445e-06, "loss": 0.5669, "loss_nan_ranks": 0, "loss_rank_avg": 0.5453179478645325, "step": 35, "valid_targets_mean": 3570.7, "valid_targets_min": 1386 }, { "epoch": 0.1951219512195122, "grad_norm": 0.7667691926950433, "learning_rate": 1.0833333333333334e-05, "loss": 0.5178, "loss_nan_ranks": 0, "loss_rank_avg": 0.4652627110481262, "step": 40, "valid_targets_mean": 4426.4, "valid_targets_min": 1435 }, { "epoch": 0.21951219512195122, "grad_norm": 0.8476292747424702, "learning_rate": 1.2222222222222224e-05, "loss": 0.5122, "loss_nan_ranks": 0, "loss_rank_avg": 0.4817873239517212, "step": 45, "valid_targets_mean": 3324.3, "valid_targets_min": 973 }, { "epoch": 0.24390243902439024, "grad_norm": 0.769487975286158, "learning_rate": 1.3611111111111113e-05, "loss": 0.5046, "loss_nan_ranks": 0, "loss_rank_avg": 0.4777624309062958, "step": 50, "valid_targets_mean": 3475.6, "valid_targets_min": 946 }, { "epoch": 0.2682926829268293, "grad_norm": 0.8003810670887064, "learning_rate": 1.5000000000000002e-05, "loss": 0.5042, "loss_nan_ranks": 0, "loss_rank_avg": 0.4954460859298706, "step": 55, "valid_targets_mean": 3635.2, "valid_targets_min": 1205 }, { "epoch": 0.2926829268292683, "grad_norm": 0.771135490268948, "learning_rate": 1.638888888888889e-05, "loss": 0.4964, "loss_nan_ranks": 0, "loss_rank_avg": 0.4748877286911011, "step": 60, "valid_targets_mean": 3454.8, "valid_targets_min": 841 }, { "epoch": 0.3170731707317073, "grad_norm": 0.8492751703287535, "learning_rate": 1.7777777777777777e-05, "loss": 0.4878, "loss_nan_ranks": 0, "loss_rank_avg": 0.5011531114578247, "step": 65, "valid_targets_mean": 2957.6, "valid_targets_min": 582 }, { "epoch": 0.34146341463414637, "grad_norm": 0.7716257577706961, "learning_rate": 1.916666666666667e-05, "loss": 0.4473, "loss_nan_ranks": 0, "loss_rank_avg": 0.47079670429229736, "step": 70, "valid_targets_mean": 3449.3, "valid_targets_min": 853 }, { "epoch": 0.36585365853658536, "grad_norm": 0.8434747864822957, "learning_rate": 2.0555555555555555e-05, "loss": 0.4557, "loss_nan_ranks": 0, "loss_rank_avg": 0.43814384937286377, "step": 75, "valid_targets_mean": 3228.6, "valid_targets_min": 414 }, { "epoch": 0.3902439024390244, "grad_norm": 0.7458155064497034, "learning_rate": 2.194444444444445e-05, "loss": 0.46, "loss_nan_ranks": 0, "loss_rank_avg": 0.44705730676651, "step": 80, "valid_targets_mean": 3524.7, "valid_targets_min": 875 }, { "epoch": 0.4146341463414634, "grad_norm": 0.8049876326023591, "learning_rate": 2.3333333333333336e-05, "loss": 0.4682, "loss_nan_ranks": 0, "loss_rank_avg": 0.5294123888015747, "step": 85, "valid_targets_mean": 3489.1, "valid_targets_min": 899 }, { "epoch": 0.43902439024390244, "grad_norm": 0.6716168920810122, "learning_rate": 2.4722222222222226e-05, "loss": 0.4455, "loss_nan_ranks": 0, "loss_rank_avg": 0.4092828929424286, "step": 90, "valid_targets_mean": 3573.4, "valid_targets_min": 979 }, { "epoch": 0.4634146341463415, "grad_norm": 0.7612668390820156, "learning_rate": 2.6111111111111114e-05, "loss": 0.4531, "loss_nan_ranks": 0, "loss_rank_avg": 0.478001207113266, "step": 95, "valid_targets_mean": 3585.0, "valid_targets_min": 878 }, { "epoch": 0.4878048780487805, "grad_norm": 0.8720193134806395, "learning_rate": 2.75e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.4325084090232849, "step": 100, "valid_targets_mean": 2660.9, "valid_targets_min": 15 }, { "epoch": 0.5121951219512195, "grad_norm": 0.9243802248475099, "learning_rate": 2.888888888888889e-05, "loss": 0.418, "loss_nan_ranks": 0, "loss_rank_avg": 0.4396073818206787, "step": 105, "valid_targets_mean": 2434.6, "valid_targets_min": 772 }, { "epoch": 0.5365853658536586, "grad_norm": 0.7410305143235792, "learning_rate": 3.027777777777778e-05, "loss": 0.4421, "loss_nan_ranks": 0, "loss_rank_avg": 0.39146214723587036, "step": 110, "valid_targets_mean": 3399.3, "valid_targets_min": 265 }, { "epoch": 0.5609756097560976, "grad_norm": 0.9932166784633583, "learning_rate": 3.1666666666666666e-05, "loss": 0.4504, "loss_nan_ranks": 0, "loss_rank_avg": 0.44398099184036255, "step": 115, "valid_targets_mean": 3370.4, "valid_targets_min": 993 }, { "epoch": 0.5853658536585366, "grad_norm": 0.7226380775954716, "learning_rate": 3.3055555555555553e-05, "loss": 0.4249, "loss_nan_ranks": 0, "loss_rank_avg": 0.3628934323787689, "step": 120, "valid_targets_mean": 3499.3, "valid_targets_min": 477 }, { "epoch": 0.6097560975609756, "grad_norm": 0.8823777151936405, "learning_rate": 3.444444444444445e-05, "loss": 0.4417, "loss_nan_ranks": 0, "loss_rank_avg": 0.41109269857406616, "step": 125, "valid_targets_mean": 2774.8, "valid_targets_min": 15 }, { "epoch": 0.6341463414634146, "grad_norm": 0.7500022204056533, "learning_rate": 3.5833333333333335e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.4249718189239502, "step": 130, "valid_targets_mean": 3250.6, "valid_targets_min": 1490 }, { "epoch": 0.6585365853658537, "grad_norm": 0.7608537033688073, "learning_rate": 3.722222222222223e-05, "loss": 0.4266, "loss_nan_ranks": 0, "loss_rank_avg": 0.41326218843460083, "step": 135, "valid_targets_mean": 3088.5, "valid_targets_min": 539 }, { "epoch": 0.6829268292682927, "grad_norm": 0.8309125080829076, "learning_rate": 3.8611111111111116e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.4202869236469269, "step": 140, "valid_targets_mean": 3053.9, "valid_targets_min": 15 }, { "epoch": 0.7073170731707317, "grad_norm": 0.7091166521276581, "learning_rate": 4e-05, "loss": 0.4009, "loss_nan_ranks": 0, "loss_rank_avg": 0.3712766766548157, "step": 145, "valid_targets_mean": 4122.2, "valid_targets_min": 1176 }, { "epoch": 0.7317073170731707, "grad_norm": 0.7272991052058403, "learning_rate": 3.999851959033532e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.4357445240020752, "step": 150, "valid_targets_mean": 3601.1, "valid_targets_min": 886 }, { "epoch": 0.7560975609756098, "grad_norm": 0.7530784025869104, "learning_rate": 3.999407858050255e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.4047582745552063, "step": 155, "valid_targets_mean": 2777.0, "valid_targets_min": 976 }, { "epoch": 0.7804878048780488, "grad_norm": 0.5809719279424417, "learning_rate": 3.998667762795307e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.3885638117790222, "step": 160, "valid_targets_mean": 5063.3, "valid_targets_min": 1079 }, { "epoch": 0.8048780487804879, "grad_norm": 0.7354864856165856, "learning_rate": 3.9976317828331075e-05, "loss": 0.4108, "loss_nan_ranks": 0, "loss_rank_avg": 0.4471714496612549, "step": 165, "valid_targets_mean": 3957.6, "valid_targets_min": 470 }, { "epoch": 0.8292682926829268, "grad_norm": 0.8186760526901421, "learning_rate": 3.9963000715311284e-05, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.47168421745300293, "step": 170, "valid_targets_mean": 2868.2, "valid_targets_min": 916 }, { "epoch": 0.8536585365853658, "grad_norm": 0.8249337383629396, "learning_rate": 3.9946728260372e-05, "loss": 0.4097, "loss_nan_ranks": 0, "loss_rank_avg": 0.4015315771102905, "step": 175, "valid_targets_mean": 3402.2, "valid_targets_min": 475 }, { "epoch": 0.8780487804878049, "grad_norm": 0.802745972860082, "learning_rate": 3.992750287250316e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.46255892515182495, "step": 180, "valid_targets_mean": 3201.5, "valid_targets_min": 467 }, { "epoch": 0.9024390243902439, "grad_norm": 0.8369027610414674, "learning_rate": 3.9905327397849776e-05, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.438213050365448, "step": 185, "valid_targets_mean": 2837.9, "valid_targets_min": 1004 }, { "epoch": 0.926829268292683, "grad_norm": 0.7620682825729357, "learning_rate": 3.988020511929055e-05, "loss": 0.4068, "loss_nan_ranks": 0, "loss_rank_avg": 0.3820480406284332, "step": 190, "valid_targets_mean": 3607.6, "valid_targets_min": 15 }, { "epoch": 0.9512195121951219, "grad_norm": 0.8231019760594912, "learning_rate": 3.985213975595188e-05, "loss": 0.4169, "loss_nan_ranks": 0, "loss_rank_avg": 0.4414719045162201, "step": 195, "valid_targets_mean": 2511.9, "valid_targets_min": 363 }, { "epoch": 0.975609756097561, "grad_norm": 0.7380667621345177, "learning_rate": 3.982113546265727e-05, "loss": 0.4196, "loss_nan_ranks": 0, "loss_rank_avg": 0.4193473160266876, "step": 200, "valid_targets_mean": 3748.1, "valid_targets_min": 692 }, { "epoch": 1.0, "grad_norm": 0.7458650521840304, "learning_rate": 3.978719682931227e-05, "loss": 0.3994, "loss_nan_ranks": 0, "loss_rank_avg": 0.3914061188697815, "step": 205, "valid_targets_mean": 3714.5, "valid_targets_min": 1568 }, { "epoch": 1.024390243902439, "grad_norm": 0.8139396617003434, "learning_rate": 3.9750328880224964e-05, "loss": 0.3843, "loss_nan_ranks": 0, "loss_rank_avg": 0.3921425938606262, "step": 210, "valid_targets_mean": 2894.5, "valid_targets_min": 1142 }, { "epoch": 1.048780487804878, "grad_norm": 0.920561844317153, "learning_rate": 3.9710537073362154e-05, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.37253957986831665, "step": 215, "valid_targets_mean": 2493.4, "valid_targets_min": 1141 }, { "epoch": 1.0731707317073171, "grad_norm": 0.8730069289304332, "learning_rate": 3.9667827299541405e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.3857506215572357, "step": 220, "valid_targets_mean": 2558.9, "valid_targets_min": 848 }, { "epoch": 1.0975609756097562, "grad_norm": 0.6229292309392103, "learning_rate": 3.962220588155889e-05, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.3528486490249634, "step": 225, "valid_targets_mean": 4876.5, "valid_targets_min": 15 }, { "epoch": 1.1219512195121952, "grad_norm": 1.0416514376225245, "learning_rate": 3.957367957325344e-05, "loss": 0.3891, "loss_nan_ranks": 0, "loss_rank_avg": 0.40384507179260254, "step": 230, "valid_targets_mean": 2797.6, "valid_targets_min": 654 }, { "epoch": 1.146341463414634, "grad_norm": 0.8974767692382145, "learning_rate": 3.952225555850662e-05, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.41643357276916504, "step": 235, "valid_targets_mean": 3033.7, "valid_targets_min": 1141 }, { "epoch": 1.170731707317073, "grad_norm": 0.6882577926862582, "learning_rate": 3.9467941450179276e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.34199321269989014, "step": 240, "valid_targets_mean": 3899.6, "valid_targets_min": 947 }, { "epoch": 1.1951219512195121, "grad_norm": 0.7598060326395756, "learning_rate": 3.94107452889845e-05, "loss": 0.3768, "loss_nan_ranks": 0, "loss_rank_avg": 0.3925563097000122, "step": 245, "valid_targets_mean": 3416.5, "valid_targets_min": 976 }, { "epoch": 1.2195121951219512, "grad_norm": 0.7953917356483156, "learning_rate": 3.9350675542297273e-05, "loss": 0.3985, "loss_nan_ranks": 0, "loss_rank_avg": 0.3889926075935364, "step": 250, "valid_targets_mean": 3197.0, "valid_targets_min": 960 }, { "epoch": 1.2439024390243902, "grad_norm": 0.8560153490249861, "learning_rate": 3.9287741102900956e-05, "loss": 0.3822, "loss_nan_ranks": 0, "loss_rank_avg": 0.3994605541229248, "step": 255, "valid_targets_mean": 2937.1, "valid_targets_min": 576 }, { "epoch": 1.2682926829268293, "grad_norm": 0.7105463458999842, "learning_rate": 3.922195128767077e-05, "loss": 0.3531, "loss_nan_ranks": 0, "loss_rank_avg": 0.32579752802848816, "step": 260, "valid_targets_mean": 3560.3, "valid_targets_min": 363 }, { "epoch": 1.2926829268292683, "grad_norm": 0.7542653042504447, "learning_rate": 3.915331583619455e-05, "loss": 0.3768, "loss_nan_ranks": 0, "loss_rank_avg": 0.3614310324192047, "step": 265, "valid_targets_mean": 3289.0, "valid_targets_min": 849 }, { "epoch": 1.3170731707317074, "grad_norm": 0.8497951456582137, "learning_rate": 3.908184490933087e-05, "loss": 0.3667, "loss_nan_ranks": 0, "loss_rank_avg": 0.3782227337360382, "step": 270, "valid_targets_mean": 2581.6, "valid_targets_min": 696 }, { "epoch": 1.3414634146341464, "grad_norm": 0.7719791862885925, "learning_rate": 3.90075490877048e-05, "loss": 0.3852, "loss_nan_ranks": 0, "loss_rank_avg": 0.37844318151474, "step": 275, "valid_targets_mean": 3066.4, "valid_targets_min": 15 }, { "epoch": 1.3658536585365852, "grad_norm": 0.7223338146517535, "learning_rate": 3.893043937014161e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.36418038606643677, "step": 280, "valid_targets_mean": 3117.9, "valid_targets_min": 940 }, { "epoch": 1.3902439024390243, "grad_norm": 0.7019756644424542, "learning_rate": 3.885052717203839e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.39389437437057495, "step": 285, "valid_targets_mean": 3810.8, "valid_targets_min": 989 }, { "epoch": 1.4146341463414633, "grad_norm": 0.7126900107048149, "learning_rate": 3.876782432367419e-05, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.3678337335586548, "step": 290, "valid_targets_mean": 3401.9, "valid_targets_min": 417 }, { "epoch": 1.4390243902439024, "grad_norm": 0.6043008862137972, "learning_rate": 3.86823430684586e-05, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.3658294081687927, "step": 295, "valid_targets_mean": 4910.9, "valid_targets_min": 1152 }, { "epoch": 1.4634146341463414, "grad_norm": 0.9549115770574411, "learning_rate": 3.859409606111927e-05, "loss": 0.3935, "loss_nan_ranks": 0, "loss_rank_avg": 0.41088318824768066, "step": 300, "valid_targets_mean": 3028.8, "valid_targets_min": 288 }, { "epoch": 1.4878048780487805, "grad_norm": 0.8424403005867168, "learning_rate": 3.850309636582844e-05, "loss": 0.3699, "loss_nan_ranks": 0, "loss_rank_avg": 0.34833067655563354, "step": 305, "valid_targets_mean": 4002.8, "valid_targets_min": 1645 }, { "epoch": 1.5121951219512195, "grad_norm": 0.6306861230305417, "learning_rate": 3.8409357454268954e-05, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.37073996663093567, "step": 310, "valid_targets_mean": 5012.4, "valid_targets_min": 1119 }, { "epoch": 1.5365853658536586, "grad_norm": 0.7155539048246429, "learning_rate": 3.831289320363988e-05, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.37950462102890015, "step": 315, "valid_targets_mean": 4102.6, "valid_targets_min": 265 }, { "epoch": 1.5609756097560976, "grad_norm": 0.7974667376832275, "learning_rate": 3.821371789460211e-05, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.36880871653556824, "step": 320, "valid_targets_mean": 3079.6, "valid_targets_min": 959 }, { "epoch": 1.5853658536585367, "grad_norm": 0.8043178482245835, "learning_rate": 3.8111846209164245e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.40152832865715027, "step": 325, "valid_targets_mean": 2942.4, "valid_targets_min": 1052 }, { "epoch": 1.6097560975609757, "grad_norm": 0.7724860293036107, "learning_rate": 3.800729322850905e-05, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.39380210638046265, "step": 330, "valid_targets_mean": 3036.5, "valid_targets_min": 886 }, { "epoch": 1.6341463414634148, "grad_norm": 0.8482463253190202, "learning_rate": 3.790007443076083e-05, "loss": 0.3838, "loss_nan_ranks": 0, "loss_rank_avg": 0.3798723816871643, "step": 335, "valid_targets_mean": 3018.6, "valid_targets_min": 15 }, { "epoch": 1.6585365853658538, "grad_norm": 0.8833799570209917, "learning_rate": 3.779020568869402e-05, "loss": 0.3559, "loss_nan_ranks": 0, "loss_rank_avg": 0.36824825406074524, "step": 340, "valid_targets_mean": 2802.5, "valid_targets_min": 711 }, { "epoch": 1.6829268292682928, "grad_norm": 0.6829517524240788, "learning_rate": 3.767770326738339e-05, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.3737407326698303, "step": 345, "valid_targets_mean": 4252.8, "valid_targets_min": 876 }, { "epoch": 1.7073170731707317, "grad_norm": 0.6903358415372074, "learning_rate": 3.7562583821796114e-05, "loss": 0.3735, "loss_nan_ranks": 0, "loss_rank_avg": 0.36414116621017456, "step": 350, "valid_targets_mean": 3665.8, "valid_targets_min": 660 }, { "epoch": 1.7317073170731707, "grad_norm": 0.6233603443152537, "learning_rate": 3.7444864394326186e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.3237985670566559, "step": 355, "valid_targets_mean": 4911.9, "valid_targets_min": 711 }, { "epoch": 1.7560975609756098, "grad_norm": 0.9214069599336896, "learning_rate": 3.732456241227141e-05, "loss": 0.3591, "loss_nan_ranks": 0, "loss_rank_avg": 0.3726973533630371, "step": 360, "valid_targets_mean": 3513.8, "valid_targets_min": 1456 }, { "epoch": 1.7804878048780488, "grad_norm": 0.7500860882422733, "learning_rate": 3.7201695685253476e-05, "loss": 0.3896, "loss_nan_ranks": 0, "loss_rank_avg": 0.3697166442871094, "step": 365, "valid_targets_mean": 3385.0, "valid_targets_min": 825 }, { "epoch": 1.8048780487804879, "grad_norm": 0.8780041171925682, "learning_rate": 3.7076282402581404e-05, "loss": 0.3927, "loss_nan_ranks": 0, "loss_rank_avg": 0.41772669553756714, "step": 370, "valid_targets_mean": 2651.1, "valid_targets_min": 1157 }, { "epoch": 1.8292682926829267, "grad_norm": 0.7547594930022417, "learning_rate": 3.694834113055877e-05, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.3808954358100891, "step": 375, "valid_targets_mean": 3378.3, "valid_targets_min": 865 }, { "epoch": 1.8536585365853657, "grad_norm": 1.062524119548778, "learning_rate": 3.681789080973514e-05, "loss": 0.3603, "loss_nan_ranks": 0, "loss_rank_avg": 0.37782055139541626, "step": 380, "valid_targets_mean": 2781.2, "valid_targets_min": 448 }, { "epoch": 1.8780487804878048, "grad_norm": 0.7747602704814769, "learning_rate": 3.668495075210207e-05, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.3768714666366577, "step": 385, "valid_targets_mean": 3837.8, "valid_targets_min": 1132 }, { "epoch": 1.9024390243902438, "grad_norm": 0.6379661220392324, "learning_rate": 3.654954063823419e-05, "loss": 0.3631, "loss_nan_ranks": 0, "loss_rank_avg": 0.3241764307022095, "step": 390, "valid_targets_mean": 3892.8, "valid_targets_min": 1626 }, { "epoch": 1.9268292682926829, "grad_norm": 0.8306626736395537, "learning_rate": 3.641168051437562e-05, "loss": 0.3838, "loss_nan_ranks": 0, "loss_rank_avg": 0.3675532937049866, "step": 395, "valid_targets_mean": 3192.4, "valid_targets_min": 916 }, { "epoch": 1.951219512195122, "grad_norm": 0.6795328481586967, "learning_rate": 3.627139078947233e-05, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.3840370774269104, "step": 400, "valid_targets_mean": 4188.1, "valid_targets_min": 1483 }, { "epoch": 1.975609756097561, "grad_norm": 0.6384377969297063, "learning_rate": 3.6128692232150775e-05, "loss": 0.3547, "loss_nan_ranks": 0, "loss_rank_avg": 0.3258434534072876, "step": 405, "valid_targets_mean": 3781.2, "valid_targets_min": 492 }, { "epoch": 2.0, "grad_norm": 0.6449462111499721, "learning_rate": 3.598360596764331e-05, "loss": 0.3574, "loss_nan_ranks": 0, "loss_rank_avg": 0.3393850326538086, "step": 410, "valid_targets_mean": 4006.3, "valid_targets_min": 719 }, { "epoch": 2.024390243902439, "grad_norm": 0.8321218841560414, "learning_rate": 3.583615347466075e-05, "loss": 0.346, "loss_nan_ranks": 0, "loss_rank_avg": 0.3338969945907593, "step": 415, "valid_targets_mean": 2771.9, "valid_targets_min": 928 }, { "epoch": 2.048780487804878, "grad_norm": 0.7320573520455086, "learning_rate": 3.568635658221266e-05, "loss": 0.3131, "loss_nan_ranks": 0, "loss_rank_avg": 0.31023988127708435, "step": 420, "valid_targets_mean": 3438.1, "valid_targets_min": 1040 }, { "epoch": 2.073170731707317, "grad_norm": 0.8445720664474741, "learning_rate": 3.553423746637577e-05, "loss": 0.3289, "loss_nan_ranks": 0, "loss_rank_avg": 0.3350527584552765, "step": 425, "valid_targets_mean": 2859.0, "valid_targets_min": 15 }, { "epoch": 2.097560975609756, "grad_norm": 0.9479920623957632, "learning_rate": 3.537981864701101e-05, "loss": 0.3556, "loss_nan_ranks": 0, "loss_rank_avg": 0.39427098631858826, "step": 430, "valid_targets_mean": 2811.9, "valid_targets_min": 760 }, { "epoch": 2.1219512195121952, "grad_norm": 0.740296255448551, "learning_rate": 3.5223122984429645e-05, "loss": 0.3533, "loss_nan_ranks": 0, "loss_rank_avg": 0.3620370626449585, "step": 435, "valid_targets_mean": 3927.2, "valid_targets_min": 1332 }, { "epoch": 2.1463414634146343, "grad_norm": 0.8475846605532067, "learning_rate": 3.5064173676008994e-05, "loss": 0.3302, "loss_nan_ranks": 0, "loss_rank_avg": 0.3536580502986908, "step": 440, "valid_targets_mean": 3192.6, "valid_targets_min": 15 }, { "epoch": 2.1707317073170733, "grad_norm": 0.7927213460960987, "learning_rate": 3.4902994252758295e-05, "loss": 0.3471, "loss_nan_ranks": 0, "loss_rank_avg": 0.35433822870254517, "step": 445, "valid_targets_mean": 3070.1, "valid_targets_min": 1186 }, { "epoch": 2.1951219512195124, "grad_norm": 0.7610761241152856, "learning_rate": 3.473960857583515e-05, "loss": 0.3171, "loss_nan_ranks": 0, "loss_rank_avg": 0.32693955302238464, "step": 450, "valid_targets_mean": 3352.6, "valid_targets_min": 470 }, { "epoch": 2.2195121951219514, "grad_norm": 0.7661120709390085, "learning_rate": 3.457404083301308e-05, "loss": 0.347, "loss_nan_ranks": 0, "loss_rank_avg": 0.39784368872642517, "step": 455, "valid_targets_mean": 2986.9, "valid_targets_min": 659 }, { "epoch": 2.2439024390243905, "grad_norm": 0.7246543900328846, "learning_rate": 3.440631553510074e-05, "loss": 0.3289, "loss_nan_ranks": 0, "loss_rank_avg": 0.34927499294281006, "step": 460, "valid_targets_mean": 4064.9, "valid_targets_min": 895 }, { "epoch": 2.2682926829268295, "grad_norm": 0.8008639860765635, "learning_rate": 3.423645751231334e-05, "loss": 0.3214, "loss_nan_ranks": 0, "loss_rank_avg": 0.31427597999572754, "step": 465, "valid_targets_mean": 3223.2, "valid_targets_min": 948 }, { "epoch": 2.292682926829268, "grad_norm": 0.8738414902443441, "learning_rate": 3.4064491910596726e-05, "loss": 0.3219, "loss_nan_ranks": 0, "loss_rank_avg": 0.34688445925712585, "step": 470, "valid_targets_mean": 2696.8, "valid_targets_min": 414 }, { "epoch": 2.317073170731707, "grad_norm": 0.8678786719122115, "learning_rate": 3.389044418790478e-05, "loss": 0.3375, "loss_nan_ranks": 0, "loss_rank_avg": 0.36265698075294495, "step": 475, "valid_targets_mean": 3027.2, "valid_targets_min": 827 }, { "epoch": 2.341463414634146, "grad_norm": 0.8786827707032558, "learning_rate": 3.371434011043059e-05, "loss": 0.337, "loss_nan_ranks": 0, "loss_rank_avg": 0.3238053321838379, "step": 480, "valid_targets_mean": 3018.0, "valid_targets_min": 879 }, { "epoch": 2.3658536585365852, "grad_norm": 0.7675111501407975, "learning_rate": 3.353620574879197e-05, "loss": 0.3149, "loss_nan_ranks": 0, "loss_rank_avg": 0.33381664752960205, "step": 485, "valid_targets_mean": 3482.8, "valid_targets_min": 822 }, { "epoch": 2.3902439024390243, "grad_norm": 0.6390042122755262, "learning_rate": 3.335606747417199e-05, "loss": 0.3216, "loss_nan_ranks": 0, "loss_rank_avg": 0.2951709032058716, "step": 490, "valid_targets_mean": 4398.2, "valid_targets_min": 778 }, { "epoch": 2.4146341463414633, "grad_norm": 0.8212805538188749, "learning_rate": 3.317395195441492e-05, "loss": 0.3528, "loss_nan_ranks": 0, "loss_rank_avg": 0.3706200122833252, "step": 495, "valid_targets_mean": 2767.0, "valid_targets_min": 15 }, { "epoch": 2.4390243902439024, "grad_norm": 0.8090900974135942, "learning_rate": 3.29898861500783e-05, "loss": 0.3328, "loss_nan_ranks": 0, "loss_rank_avg": 0.3654935956001282, "step": 500, "valid_targets_mean": 3085.2, "valid_targets_min": 242 }, { "epoch": 2.4634146341463414, "grad_norm": 0.7178716047967076, "learning_rate": 3.280389731044172e-05, "loss": 0.3202, "loss_nan_ranks": 0, "loss_rank_avg": 0.32301709055900574, "step": 505, "valid_targets_mean": 3715.7, "valid_targets_min": 716 }, { "epoch": 2.4878048780487805, "grad_norm": 0.7793422281432595, "learning_rate": 3.261601296947274e-05, "loss": 0.3201, "loss_nan_ranks": 0, "loss_rank_avg": 0.335943341255188, "step": 510, "valid_targets_mean": 3158.2, "valid_targets_min": 734 }, { "epoch": 2.5121951219512195, "grad_norm": 0.7681138811218, "learning_rate": 3.242626094175078e-05, "loss": 0.3346, "loss_nan_ranks": 0, "loss_rank_avg": 0.35204848647117615, "step": 515, "valid_targets_mean": 3170.2, "valid_targets_min": 1079 }, { "epoch": 2.5365853658536586, "grad_norm": 0.6712571944834722, "learning_rate": 3.223466931834942e-05, "loss": 0.318, "loss_nan_ranks": 0, "loss_rank_avg": 0.29909318685531616, "step": 520, "valid_targets_mean": 4161.2, "valid_targets_min": 766 }, { "epoch": 2.5609756097560976, "grad_norm": 0.8147312751968008, "learning_rate": 3.204126646267775e-05, "loss": 0.336, "loss_nan_ranks": 0, "loss_rank_avg": 0.3322216272354126, "step": 525, "valid_targets_mean": 3378.3, "valid_targets_min": 443 }, { "epoch": 2.5853658536585367, "grad_norm": 0.9657053285042281, "learning_rate": 3.184608100628144e-05, "loss": 0.3356, "loss_nan_ranks": 0, "loss_rank_avg": 0.3430883288383484, "step": 530, "valid_targets_mean": 3151.1, "valid_targets_min": 1443 }, { "epoch": 2.6097560975609757, "grad_norm": 0.8322031702520009, "learning_rate": 3.1649141844604104e-05, "loss": 0.3355, "loss_nan_ranks": 0, "loss_rank_avg": 0.338076114654541, "step": 535, "valid_targets_mean": 3503.4, "valid_targets_min": 1613 }, { "epoch": 2.6341463414634148, "grad_norm": 0.6205044905999079, "learning_rate": 3.1450478132709564e-05, "loss": 0.3285, "loss_nan_ranks": 0, "loss_rank_avg": 0.27576902508735657, "step": 540, "valid_targets_mean": 3844.5, "valid_targets_min": 1855 }, { "epoch": 2.658536585365854, "grad_norm": 0.6286837078803078, "learning_rate": 3.125011928096573e-05, "loss": 0.3151, "loss_nan_ranks": 0, "loss_rank_avg": 0.3034632205963135, "step": 545, "valid_targets_mean": 4288.0, "valid_targets_min": 719 }, { "epoch": 2.682926829268293, "grad_norm": 0.6004872520430579, "learning_rate": 3.1048094950690666e-05, "loss": 0.3253, "loss_nan_ranks": 0, "loss_rank_avg": 0.31010162830352783, "step": 550, "valid_targets_mean": 4788.6, "valid_targets_min": 1098 }, { "epoch": 2.7073170731707314, "grad_norm": 0.7437431500245776, "learning_rate": 3.084443504976146e-05, "loss": 0.3211, "loss_nan_ranks": 0, "loss_rank_avg": 0.3372463881969452, "step": 555, "valid_targets_mean": 3434.3, "valid_targets_min": 706 }, { "epoch": 2.7317073170731705, "grad_norm": 0.7853234418992919, "learning_rate": 3.063916972818668e-05, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.32306212186813354, "step": 560, "valid_targets_mean": 3142.7, "valid_targets_min": 509 }, { "epoch": 2.7560975609756095, "grad_norm": 0.7334837795099509, "learning_rate": 3.0432329373642932e-05, "loss": 0.3296, "loss_nan_ranks": 0, "loss_rank_avg": 0.35558754205703735, "step": 565, "valid_targets_mean": 3550.8, "valid_targets_min": 951 }, { "epoch": 2.7804878048780486, "grad_norm": 0.7080533587312593, "learning_rate": 3.022394460697619e-05, "loss": 0.3424, "loss_nan_ranks": 0, "loss_rank_avg": 0.33535170555114746, "step": 570, "valid_targets_mean": 3713.6, "valid_targets_min": 947 }, { "epoch": 2.8048780487804876, "grad_norm": 0.73150297718836, "learning_rate": 3.0014046277668717e-05, "loss": 0.34, "loss_nan_ranks": 0, "loss_rank_avg": 0.3074615001678467, "step": 575, "valid_targets_mean": 3663.5, "valid_targets_min": 215 }, { "epoch": 2.8292682926829267, "grad_norm": 0.6590534931503084, "learning_rate": 2.980266545927203e-05, "loss": 0.3348, "loss_nan_ranks": 0, "loss_rank_avg": 0.32705825567245483, "step": 580, "valid_targets_mean": 4970.6, "valid_targets_min": 1788 }, { "epoch": 2.8536585365853657, "grad_norm": 0.6817244060162384, "learning_rate": 2.9589833444806792e-05, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.30664777755737305, "step": 585, "valid_targets_mean": 3579.2, "valid_targets_min": 917 }, { "epoch": 2.8780487804878048, "grad_norm": 0.7336497853782242, "learning_rate": 2.9375581742130114e-05, "loss": 0.3305, "loss_nan_ranks": 0, "loss_rank_avg": 0.3490193486213684, "step": 590, "valid_targets_mean": 3624.7, "valid_targets_min": 876 }, { "epoch": 2.902439024390244, "grad_norm": 0.7790909299976445, "learning_rate": 2.9159942069271134e-05, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.3473791778087616, "step": 595, "valid_targets_mean": 3575.1, "valid_targets_min": 532 }, { "epoch": 2.926829268292683, "grad_norm": 0.8701084213390046, "learning_rate": 2.8942946349735418e-05, "loss": 0.3389, "loss_nan_ranks": 0, "loss_rank_avg": 0.3636641800403595, "step": 600, "valid_targets_mean": 3125.3, "valid_targets_min": 1655 }, { "epoch": 2.951219512195122, "grad_norm": 0.6582306859910523, "learning_rate": 2.872462670777902e-05, "loss": 0.3444, "loss_nan_ranks": 0, "loss_rank_avg": 0.34115156531333923, "step": 605, "valid_targets_mean": 4006.7, "valid_targets_min": 856 }, { "epoch": 2.975609756097561, "grad_norm": 0.6682235966360865, "learning_rate": 2.850501546365272e-05, "loss": 0.3298, "loss_nan_ranks": 0, "loss_rank_avg": 0.2904140055179596, "step": 610, "valid_targets_mean": 4037.1, "valid_targets_min": 1157 }, { "epoch": 3.0, "grad_norm": 0.7895951757040935, "learning_rate": 2.8284145128817356e-05, "loss": 0.3203, "loss_nan_ranks": 0, "loss_rank_avg": 0.354103684425354, "step": 615, "valid_targets_mean": 3262.8, "valid_targets_min": 841 }, { "epoch": 3.024390243902439, "grad_norm": 0.6974883146545042, "learning_rate": 2.8062048401130756e-05, "loss": 0.2882, "loss_nan_ranks": 0, "loss_rank_avg": 0.24095089733600616, "step": 620, "valid_targets_mean": 3347.9, "valid_targets_min": 883 }, { "epoch": 3.048780487804878, "grad_norm": 0.7109250659036205, "learning_rate": 2.7838758160007143e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.2862807512283325, "step": 625, "valid_targets_mean": 4033.4, "valid_targets_min": 1140 }, { "epoch": 3.073170731707317, "grad_norm": 0.7946160604885082, "learning_rate": 2.7614307461549608e-05, "loss": 0.2901, "loss_nan_ranks": 0, "loss_rank_avg": 0.29598280787467957, "step": 630, "valid_targets_mean": 3057.0, "valid_targets_min": 1115 }, { "epoch": 3.097560975609756, "grad_norm": 0.7097254573358867, "learning_rate": 2.7388729533656486e-05, "loss": 0.3045, "loss_nan_ranks": 0, "loss_rank_avg": 0.3431246876716614, "step": 635, "valid_targets_mean": 4266.8, "valid_targets_min": 1238 }, { "epoch": 3.1219512195121952, "grad_norm": 0.6657050549479481, "learning_rate": 2.716205777110222e-05, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.2793746590614319, "step": 640, "valid_targets_mean": 4308.1, "valid_targets_min": 1604 }, { "epoch": 3.1463414634146343, "grad_norm": 0.8163459205438388, "learning_rate": 2.693432573059362e-05, "loss": 0.3019, "loss_nan_ranks": 0, "loss_rank_avg": 0.3036867678165436, "step": 645, "valid_targets_mean": 3326.4, "valid_targets_min": 1119 }, { "epoch": 3.1707317073170733, "grad_norm": 0.7669109525500525, "learning_rate": 2.6705567125802062e-05, "loss": 0.3081, "loss_nan_ranks": 0, "loss_rank_avg": 0.3797120451927185, "step": 650, "valid_targets_mean": 3517.2, "valid_targets_min": 1445 }, { "epoch": 3.1951219512195124, "grad_norm": 0.7809371072538767, "learning_rate": 2.6475815822372477e-05, "loss": 0.2962, "loss_nan_ranks": 0, "loss_rank_avg": 0.2904462516307831, "step": 655, "valid_targets_mean": 3374.4, "valid_targets_min": 752 }, { "epoch": 3.2195121951219514, "grad_norm": 0.8873754094152563, "learning_rate": 2.624510583290988e-05, "loss": 0.3042, "loss_nan_ranks": 0, "loss_rank_avg": 0.2988511919975281, "step": 660, "valid_targets_mean": 2854.0, "valid_targets_min": 283 }, { "epoch": 3.2439024390243905, "grad_norm": 0.6937976107448536, "learning_rate": 2.601347131194408e-05, "loss": 0.2824, "loss_nan_ranks": 0, "loss_rank_avg": 0.2786598205566406, "step": 665, "valid_targets_mean": 3895.2, "valid_targets_min": 848 }, { "epoch": 3.2682926829268295, "grad_norm": 0.6938504187586086, "learning_rate": 2.578094655087343e-05, "loss": 0.2887, "loss_nan_ranks": 0, "loss_rank_avg": 0.284922331571579, "step": 670, "valid_targets_mean": 3660.7, "valid_targets_min": 376 }, { "epoch": 3.292682926829268, "grad_norm": 0.6657085168651424, "learning_rate": 2.5547565972888287e-05, "loss": 0.2758, "loss_nan_ranks": 0, "loss_rank_avg": 0.2657183110713959, "step": 675, "valid_targets_mean": 4128.4, "valid_targets_min": 1404 }, { "epoch": 3.317073170731707, "grad_norm": 0.7187359550353202, "learning_rate": 2.5313364127874974e-05, "loss": 0.2936, "loss_nan_ranks": 0, "loss_rank_avg": 0.27557191252708435, "step": 680, "valid_targets_mean": 3624.4, "valid_targets_min": 796 }, { "epoch": 3.341463414634146, "grad_norm": 0.7875009925701425, "learning_rate": 2.507837568730097e-05, "loss": 0.289, "loss_nan_ranks": 0, "loss_rank_avg": 0.3072514533996582, "step": 685, "valid_targets_mean": 3990.4, "valid_targets_min": 15 }, { "epoch": 3.3658536585365852, "grad_norm": 0.842619447531655, "learning_rate": 2.484263543908213e-05, "loss": 0.3008, "loss_nan_ranks": 0, "loss_rank_avg": 0.3235273063182831, "step": 690, "valid_targets_mean": 2521.5, "valid_targets_min": 1158 }, { "epoch": 3.3902439024390243, "grad_norm": 0.7753973868751888, "learning_rate": 2.460617828243263e-05, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.27399516105651855, "step": 695, "valid_targets_mean": 3223.4, "valid_targets_min": 899 }, { "epoch": 3.4146341463414633, "grad_norm": 0.7336548815168598, "learning_rate": 2.4369039222698484e-05, "loss": 0.2898, "loss_nan_ranks": 0, "loss_rank_avg": 0.2887844443321228, "step": 700, "valid_targets_mean": 3239.1, "valid_targets_min": 979 }, { "epoch": 3.4390243902439024, "grad_norm": 0.7641087596057364, "learning_rate": 2.413125336617526e-05, "loss": 0.3052, "loss_nan_ranks": 0, "loss_rank_avg": 0.30372944474220276, "step": 705, "valid_targets_mean": 3645.4, "valid_targets_min": 1803 }, { "epoch": 3.4634146341463414, "grad_norm": 0.7436323943905782, "learning_rate": 2.3892855914910996e-05, "loss": 0.2925, "loss_nan_ranks": 0, "loss_rank_avg": 0.2785486578941345, "step": 710, "valid_targets_mean": 3915.2, "valid_targets_min": 1063 }, { "epoch": 3.4878048780487805, "grad_norm": 0.7111257893891988, "learning_rate": 2.3653882161494758e-05, "loss": 0.2896, "loss_nan_ranks": 0, "loss_rank_avg": 0.2819863557815552, "step": 715, "valid_targets_mean": 3914.8, "valid_targets_min": 1277 }, { "epoch": 3.5121951219512195, "grad_norm": 0.7237969701861009, "learning_rate": 2.3414367483831972e-05, "loss": 0.2972, "loss_nan_ranks": 0, "loss_rank_avg": 0.2876496911048889, "step": 720, "valid_targets_mean": 3642.9, "valid_targets_min": 575 }, { "epoch": 3.5365853658536586, "grad_norm": 0.750501060308758, "learning_rate": 2.3174347339906994e-05, "loss": 0.2909, "loss_nan_ranks": 0, "loss_rank_avg": 0.31642240285873413, "step": 725, "valid_targets_mean": 3697.5, "valid_targets_min": 917 }, { "epoch": 3.5609756097560976, "grad_norm": 0.7241041312746126, "learning_rate": 2.293385726253392e-05, "loss": 0.2953, "loss_nan_ranks": 0, "loss_rank_avg": 0.2714157700538635, "step": 730, "valid_targets_mean": 3771.2, "valid_targets_min": 723 }, { "epoch": 3.5853658536585367, "grad_norm": 0.6954133761615647, "learning_rate": 2.2692932854096218e-05, "loss": 0.2895, "loss_nan_ranks": 0, "loss_rank_avg": 0.25798699259757996, "step": 735, "valid_targets_mean": 3402.4, "valid_targets_min": 1080 }, { "epoch": 3.6097560975609757, "grad_norm": 0.7417839254349834, "learning_rate": 2.245160978127616e-05, "loss": 0.2971, "loss_nan_ranks": 0, "loss_rank_avg": 0.28403568267822266, "step": 740, "valid_targets_mean": 3251.5, "valid_targets_min": 841 }, { "epoch": 3.6341463414634148, "grad_norm": 0.7403025968890654, "learning_rate": 2.220992376977468e-05, "loss": 0.2993, "loss_nan_ranks": 0, "loss_rank_avg": 0.32009467482566833, "step": 745, "valid_targets_mean": 3620.8, "valid_targets_min": 1178 }, { "epoch": 3.658536585365854, "grad_norm": 0.9161578181958758, "learning_rate": 2.1967910599022508e-05, "loss": 0.2898, "loss_nan_ranks": 0, "loss_rank_avg": 0.331581711769104, "step": 750, "valid_targets_mean": 2408.2, "valid_targets_min": 507 }, { "epoch": 3.682926829268293, "grad_norm": 0.881155980649594, "learning_rate": 2.1725606096883324e-05, "loss": 0.2915, "loss_nan_ranks": 0, "loss_rank_avg": 0.29733753204345703, "step": 755, "valid_targets_mean": 2705.3, "valid_targets_min": 257 }, { "epoch": 3.7073170731707314, "grad_norm": 0.7297940343575094, "learning_rate": 2.1483046134349813e-05, "loss": 0.2876, "loss_nan_ranks": 0, "loss_rank_avg": 0.28595206141471863, "step": 760, "valid_targets_mean": 4102.2, "valid_targets_min": 318 }, { "epoch": 3.7317073170731705, "grad_norm": 0.8649348588673672, "learning_rate": 2.1240266620233263e-05, "loss": 0.3112, "loss_nan_ranks": 0, "loss_rank_avg": 0.31264781951904297, "step": 765, "valid_targets_mean": 2636.3, "valid_targets_min": 772 }, { "epoch": 3.7560975609756095, "grad_norm": 0.7040450574601868, "learning_rate": 2.099730349584757e-05, "loss": 0.2832, "loss_nan_ranks": 0, "loss_rank_avg": 0.3125355541706085, "step": 770, "valid_targets_mean": 3862.3, "valid_targets_min": 867 }, { "epoch": 3.7804878048780486, "grad_norm": 0.6737989713489716, "learning_rate": 2.0754192729688496e-05, "loss": 0.2996, "loss_nan_ranks": 0, "loss_rank_avg": 0.2767524719238281, "step": 775, "valid_targets_mean": 3927.2, "valid_targets_min": 1667 }, { "epoch": 3.8048780487804876, "grad_norm": 0.745892500316203, "learning_rate": 2.0510970312108804e-05, "loss": 0.2893, "loss_nan_ranks": 0, "loss_rank_avg": 0.30204904079437256, "step": 780, "valid_targets_mean": 3651.2, "valid_targets_min": 1125 }, { "epoch": 3.8292682926829267, "grad_norm": 0.9046132327931361, "learning_rate": 2.026767224999028e-05, "loss": 0.3032, "loss_nan_ranks": 0, "loss_rank_avg": 0.3154948353767395, "step": 785, "valid_targets_mean": 2786.5, "valid_targets_min": 15 }, { "epoch": 3.8536585365853657, "grad_norm": 0.7325036224094065, "learning_rate": 2.0024334561413167e-05, "loss": 0.2907, "loss_nan_ranks": 0, "loss_rank_avg": 0.29411929845809937, "step": 790, "valid_targets_mean": 3472.8, "valid_targets_min": 827 }, { "epoch": 3.8780487804878048, "grad_norm": 0.6498743836905022, "learning_rate": 1.9780993270324063e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.29405295848846436, "step": 795, "valid_targets_mean": 3882.9, "valid_targets_min": 803 }, { "epoch": 3.902439024390244, "grad_norm": 0.7094727814399049, "learning_rate": 1.953768440120289e-05, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.3063426613807678, "step": 800, "valid_targets_mean": 3734.4, "valid_targets_min": 1149 }, { "epoch": 3.926829268292683, "grad_norm": 0.6952694500011949, "learning_rate": 1.9294443973729768e-05, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.27972686290740967, "step": 805, "valid_targets_mean": 4442.4, "valid_targets_min": 916 }, { "epoch": 3.951219512195122, "grad_norm": 0.7839669200280891, "learning_rate": 1.905130799745268e-05, "loss": 0.2969, "loss_nan_ranks": 0, "loss_rank_avg": 0.31134629249572754, "step": 810, "valid_targets_mean": 3801.6, "valid_targets_min": 1288 }, { "epoch": 3.975609756097561, "grad_norm": 0.9512260820024343, "learning_rate": 1.8808312466456525e-05, "loss": 0.2856, "loss_nan_ranks": 0, "loss_rank_avg": 0.2893509864807129, "step": 815, "valid_targets_mean": 2878.5, "valid_targets_min": 719 }, { "epoch": 4.0, "grad_norm": 0.9409702055658681, "learning_rate": 1.856549335403457e-05, "loss": 0.2977, "loss_nan_ranks": 0, "loss_rank_avg": 0.252194344997406, "step": 820, "valid_targets_mean": 2933.2, "valid_targets_min": 15 }, { "epoch": 4.024390243902439, "grad_norm": 0.776260250287857, "learning_rate": 1.832288660736288e-05, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.2807005047798157, "step": 825, "valid_targets_mean": 3608.9, "valid_targets_min": 576 }, { "epoch": 4.048780487804878, "grad_norm": 0.9103236999012465, "learning_rate": 1.808052814217871e-05, "loss": 0.2656, "loss_nan_ranks": 0, "loss_rank_avg": 0.25947239995002747, "step": 830, "valid_targets_mean": 2974.0, "valid_targets_min": 795 }, { "epoch": 4.073170731707317, "grad_norm": 0.8207458807710281, "learning_rate": 1.7838453837463498e-05, "loss": 0.2833, "loss_nan_ranks": 0, "loss_rank_avg": 0.27700158953666687, "step": 835, "valid_targets_mean": 3059.9, "valid_targets_min": 257 }, { "epoch": 4.097560975609756, "grad_norm": 0.8039571548469557, "learning_rate": 1.7596699530131234e-05, "loss": 0.2617, "loss_nan_ranks": 0, "loss_rank_avg": 0.27044573426246643, "step": 840, "valid_targets_mean": 3390.8, "valid_targets_min": 1003 }, { "epoch": 4.121951219512195, "grad_norm": 0.9264411779501436, "learning_rate": 1.735530100972326e-05, "loss": 0.2681, "loss_nan_ranks": 0, "loss_rank_avg": 0.26993459463119507, "step": 845, "valid_targets_mean": 3243.6, "valid_targets_min": 899 }, { "epoch": 4.146341463414634, "grad_norm": 0.7529964631351728, "learning_rate": 1.711429401310981e-05, "loss": 0.2808, "loss_nan_ranks": 0, "loss_rank_avg": 0.27355536818504333, "step": 850, "valid_targets_mean": 4023.1, "valid_targets_min": 737 }, { "epoch": 4.170731707317073, "grad_norm": 0.731074690052081, "learning_rate": 1.687371421919961e-05, "loss": 0.2702, "loss_nan_ranks": 0, "loss_rank_avg": 0.28618645668029785, "step": 855, "valid_targets_mean": 3938.9, "valid_targets_min": 1832 }, { "epoch": 4.195121951219512, "grad_norm": 0.800079732721868, "learning_rate": 1.6633597243657855e-05, "loss": 0.283, "loss_nan_ranks": 0, "loss_rank_avg": 0.27056562900543213, "step": 860, "valid_targets_mean": 3650.1, "valid_targets_min": 1942 }, { "epoch": 4.219512195121951, "grad_norm": 0.7533482927823113, "learning_rate": 1.6393978633633668e-05, "loss": 0.2692, "loss_nan_ranks": 0, "loss_rank_avg": 0.2656148076057434, "step": 865, "valid_targets_mean": 3817.5, "valid_targets_min": 1813 }, { "epoch": 4.2439024390243905, "grad_norm": 0.827289388678526, "learning_rate": 1.6154893862497673e-05, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.31596875190734863, "step": 870, "valid_targets_mean": 3127.2, "valid_targets_min": 15 }, { "epoch": 4.2682926829268295, "grad_norm": 0.7369615846193128, "learning_rate": 1.5916378324590437e-05, "loss": 0.2598, "loss_nan_ranks": 0, "loss_rank_avg": 0.2373199164867401, "step": 875, "valid_targets_mean": 4179.9, "valid_targets_min": 1584 }, { "epoch": 4.2926829268292686, "grad_norm": 1.187298637557265, "learning_rate": 1.5678467329982717e-05, "loss": 0.267, "loss_nan_ranks": 0, "loss_rank_avg": 0.2759140729904175, "step": 880, "valid_targets_mean": 2973.3, "valid_targets_min": 228 }, { "epoch": 4.317073170731708, "grad_norm": 0.8246031829017411, "learning_rate": 1.54411960992481e-05, "loss": 0.2786, "loss_nan_ranks": 0, "loss_rank_avg": 0.2655995190143585, "step": 885, "valid_targets_mean": 3138.6, "valid_targets_min": 474 }, { "epoch": 4.341463414634147, "grad_norm": 0.8451062461907763, "learning_rate": 1.5204599758248884e-05, "loss": 0.2797, "loss_nan_ranks": 0, "loss_rank_avg": 0.30422964692115784, "step": 890, "valid_targets_mean": 3195.1, "valid_targets_min": 612 }, { "epoch": 4.365853658536586, "grad_norm": 0.9965472168956042, "learning_rate": 1.4968713332936063e-05, "loss": 0.2648, "loss_nan_ranks": 0, "loss_rank_avg": 0.2871067523956299, "step": 895, "valid_targets_mean": 3281.6, "valid_targets_min": 1371 }, { "epoch": 4.390243902439025, "grad_norm": 0.826560392213051, "learning_rate": 1.473357174416401e-05, "loss": 0.2589, "loss_nan_ranks": 0, "loss_rank_avg": 0.2745562493801117, "step": 900, "valid_targets_mean": 3754.0, "valid_targets_min": 1695 }, { "epoch": 4.414634146341464, "grad_norm": 0.773956566336489, "learning_rate": 1.4499209802520791e-05, "loss": 0.2616, "loss_nan_ranks": 0, "loss_rank_avg": 0.2717684209346771, "step": 905, "valid_targets_mean": 3665.7, "valid_targets_min": 15 }, { "epoch": 4.439024390243903, "grad_norm": 0.8542711399057813, "learning_rate": 1.426566220317474e-05, "loss": 0.2664, "loss_nan_ranks": 0, "loss_rank_avg": 0.2737499475479126, "step": 910, "valid_targets_mean": 3421.7, "valid_targets_min": 830 }, { "epoch": 4.463414634146342, "grad_norm": 0.7962556850092083, "learning_rate": 1.4032963520738183e-05, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.2802625596523285, "step": 915, "valid_targets_mean": 3547.3, "valid_targets_min": 1008 }, { "epoch": 4.487804878048781, "grad_norm": 0.7955484777521536, "learning_rate": 1.3801148204148983e-05, "loss": 0.2625, "loss_nan_ranks": 0, "loss_rank_avg": 0.2767943739891052, "step": 920, "valid_targets_mean": 3616.8, "valid_targets_min": 773 }, { "epoch": 4.512195121951219, "grad_norm": 0.8832279494046948, "learning_rate": 1.357025057157062e-05, "loss": 0.2732, "loss_nan_ranks": 0, "loss_rank_avg": 0.2521354854106903, "step": 925, "valid_targets_mean": 3143.2, "valid_targets_min": 359 }, { "epoch": 4.536585365853659, "grad_norm": 0.8306588428706537, "learning_rate": 1.3340304805311802e-05, "loss": 0.2593, "loss_nan_ranks": 0, "loss_rank_avg": 0.2435213327407837, "step": 930, "valid_targets_mean": 2687.6, "valid_targets_min": 15 }, { "epoch": 4.560975609756097, "grad_norm": 0.9185238023912488, "learning_rate": 1.3111344946765977e-05, "loss": 0.2576, "loss_nan_ranks": 0, "loss_rank_avg": 0.2888595461845398, "step": 935, "valid_targets_mean": 2984.8, "valid_targets_min": 1098 }, { "epoch": 4.585365853658536, "grad_norm": 0.7784668165576322, "learning_rate": 1.2883404891371907e-05, "loss": 0.276, "loss_nan_ranks": 0, "loss_rank_avg": 0.26759079098701477, "step": 940, "valid_targets_mean": 3324.6, "valid_targets_min": 283 }, { "epoch": 4.609756097560975, "grad_norm": 0.7198225881137529, "learning_rate": 1.2656518383595681e-05, "loss": 0.2621, "loss_nan_ranks": 0, "loss_rank_avg": 0.26038920879364014, "step": 945, "valid_targets_mean": 3841.4, "valid_targets_min": 1226 }, { "epoch": 4.634146341463414, "grad_norm": 0.8067268967379502, "learning_rate": 1.243071901193519e-05, "loss": 0.2676, "loss_nan_ranks": 0, "loss_rank_avg": 0.2618296444416046, "step": 950, "valid_targets_mean": 3059.1, "valid_targets_min": 1288 }, { "epoch": 4.658536585365853, "grad_norm": 0.7371365951689539, "learning_rate": 1.2206040203947645e-05, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.25615987181663513, "step": 955, "valid_targets_mean": 4461.9, "valid_targets_min": 448 }, { "epoch": 4.682926829268292, "grad_norm": 0.7267519757978881, "learning_rate": 1.1982515221300917e-05, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.24453848600387573, "step": 960, "valid_targets_mean": 3733.6, "valid_targets_min": 1445 }, { "epoch": 4.7073170731707314, "grad_norm": 0.7178491229060002, "learning_rate": 1.1760177154849473e-05, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.23430632054805756, "step": 965, "valid_targets_mean": 3454.4, "valid_targets_min": 647 }, { "epoch": 4.7317073170731705, "grad_norm": 0.8635688252339446, "learning_rate": 1.1539058919735557e-05, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.2552710175514221, "step": 970, "valid_targets_mean": 3115.4, "valid_targets_min": 260 }, { "epoch": 4.7560975609756095, "grad_norm": 0.8178807633478934, "learning_rate": 1.1319193250516387e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.26949605345726013, "step": 975, "valid_targets_mean": 3029.0, "valid_targets_min": 1160 }, { "epoch": 4.780487804878049, "grad_norm": 0.8606877647771227, "learning_rate": 1.1100612696318143e-05, "loss": 0.2707, "loss_nan_ranks": 0, "loss_rank_avg": 0.2672426998615265, "step": 980, "valid_targets_mean": 2994.4, "valid_targets_min": 597 }, { "epoch": 4.804878048780488, "grad_norm": 0.8363734717967222, "learning_rate": 1.0883349616017312e-05, "loss": 0.2746, "loss_nan_ranks": 0, "loss_rank_avg": 0.2739032506942749, "step": 985, "valid_targets_mean": 2968.4, "valid_targets_min": 426 }, { "epoch": 4.829268292682927, "grad_norm": 0.7709748619894506, "learning_rate": 1.0667436173450278e-05, "loss": 0.2476, "loss_nan_ranks": 0, "loss_rank_avg": 0.26799261569976807, "step": 990, "valid_targets_mean": 3660.3, "valid_targets_min": 1059 }, { "epoch": 4.853658536585366, "grad_norm": 0.8172048769220687, "learning_rate": 1.0452904332651745e-05, "loss": 0.2621, "loss_nan_ranks": 0, "loss_rank_avg": 0.25125956535339355, "step": 995, "valid_targets_mean": 2627.2, "valid_targets_min": 15 }, { "epoch": 4.878048780487805, "grad_norm": 0.7823743455001714, "learning_rate": 1.0239785853122784e-05, "loss": 0.265, "loss_nan_ranks": 0, "loss_rank_avg": 0.265064537525177, "step": 1000, "valid_targets_mean": 3875.0, "valid_targets_min": 15 }, { "epoch": 4.902439024390244, "grad_norm": 0.7650125864589816, "learning_rate": 1.0028112285129058e-05, "loss": 0.2654, "loss_nan_ranks": 0, "loss_rank_avg": 0.29131659865379333, "step": 1005, "valid_targets_mean": 3470.4, "valid_targets_min": 786 }, { "epoch": 4.926829268292683, "grad_norm": 0.668827826218542, "learning_rate": 9.81791496503015e-06, "loss": 0.2616, "loss_nan_ranks": 0, "loss_rank_avg": 0.2571777105331421, "step": 1010, "valid_targets_mean": 4372.1, "valid_targets_min": 1355 }, { "epoch": 4.951219512195122, "grad_norm": 0.6873823999708377, "learning_rate": 9.60922501064049e-06, "loss": 0.2582, "loss_nan_ranks": 0, "loss_rank_avg": 0.2555577754974365, "step": 1015, "valid_targets_mean": 4297.0, "valid_targets_min": 1361 }, { "epoch": 4.975609756097561, "grad_norm": 0.7801780285443586, "learning_rate": 9.402073316622609e-06, "loss": 0.2536, "loss_nan_ranks": 0, "loss_rank_avg": 0.24865934252738953, "step": 1020, "valid_targets_mean": 3150.4, "valid_targets_min": 875 }, { "epoch": 5.0, "grad_norm": 0.7967802474346398, "learning_rate": 9.196490549913486e-06, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.2609595060348511, "step": 1025, "valid_targets_mean": 3708.4, "valid_targets_min": 1052 }, { "epoch": 5.024390243902439, "grad_norm": 0.6513129597791458, "learning_rate": 8.992507145184613e-06, "loss": 0.2405, "loss_nan_ranks": 0, "loss_rank_avg": 0.2445092499256134, "step": 1030, "valid_targets_mean": 4137.7, "valid_targets_min": 448 }, { "epoch": 5.048780487804878, "grad_norm": 0.9278929525940717, "learning_rate": 8.79015330033635e-06, "loss": 0.2438, "loss_nan_ranks": 0, "loss_rank_avg": 0.24177995324134827, "step": 1035, "valid_targets_mean": 3097.0, "valid_targets_min": 1277 }, { "epoch": 5.073170731707317, "grad_norm": 1.552193652315139, "learning_rate": 8.589458972027483e-06, "loss": 0.2436, "loss_nan_ranks": 0, "loss_rank_avg": 0.24827489256858826, "step": 1040, "valid_targets_mean": 3416.3, "valid_targets_min": 1001 }, { "epoch": 5.097560975609756, "grad_norm": 0.7093858199198262, "learning_rate": 8.390453871240307e-06, "loss": 0.2453, "loss_nan_ranks": 0, "loss_rank_avg": 0.23724865913391113, "step": 1045, "valid_targets_mean": 4210.4, "valid_targets_min": 1619 }, { "epoch": 5.121951219512195, "grad_norm": 1.0012021920667462, "learning_rate": 8.193167458882292e-06, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.27012312412261963, "step": 1050, "valid_targets_mean": 2548.9, "valid_targets_min": 641 }, { "epoch": 5.146341463414634, "grad_norm": 0.8690910331604649, "learning_rate": 7.9976289414246e-06, "loss": 0.2452, "loss_nan_ranks": 0, "loss_rank_avg": 0.2542031705379486, "step": 1055, "valid_targets_mean": 2743.4, "valid_targets_min": 744 }, { "epoch": 5.170731707317073, "grad_norm": 1.044901979443724, "learning_rate": 7.803867266578329e-06, "loss": 0.2373, "loss_nan_ranks": 0, "loss_rank_avg": 0.25191807746887207, "step": 1060, "valid_targets_mean": 3127.7, "valid_targets_min": 906 }, { "epoch": 5.195121951219512, "grad_norm": 0.9050120681858465, "learning_rate": 7.611911119009092e-06, "loss": 0.2536, "loss_nan_ranks": 0, "loss_rank_avg": 0.2629399001598358, "step": 1065, "valid_targets_mean": 3758.2, "valid_targets_min": 376 }, { "epoch": 5.219512195121951, "grad_norm": 0.9283099215477023, "learning_rate": 7.4217889160904824e-06, "loss": 0.2544, "loss_nan_ranks": 0, "loss_rank_avg": 0.2586513161659241, "step": 1070, "valid_targets_mean": 2433.7, "valid_targets_min": 228 }, { "epoch": 5.2439024390243905, "grad_norm": 0.8259428205883735, "learning_rate": 7.233528803697185e-06, "loss": 0.2452, "loss_nan_ranks": 0, "loss_rank_avg": 0.24752004444599152, "step": 1075, "valid_targets_mean": 3368.4, "valid_targets_min": 741 }, { "epoch": 5.2682926829268295, "grad_norm": 0.7209136325847043, "learning_rate": 7.0471586520381796e-06, "loss": 0.2497, "loss_nan_ranks": 0, "loss_rank_avg": 0.23628446459770203, "step": 1080, "valid_targets_mean": 4003.1, "valid_targets_min": 730 }, { "epoch": 5.2926829268292686, "grad_norm": 0.7406882461573, "learning_rate": 6.8627060515308275e-06, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.23052799701690674, "step": 1085, "valid_targets_mean": 3915.6, "valid_targets_min": 895 }, { "epoch": 5.317073170731708, "grad_norm": 0.9022661536003552, "learning_rate": 6.6801983087163944e-06, "loss": 0.2514, "loss_nan_ranks": 0, "loss_rank_avg": 0.2609993815422058, "step": 1090, "valid_targets_mean": 3229.1, "valid_targets_min": 1347 }, { "epoch": 5.341463414634147, "grad_norm": 0.6825772427499014, "learning_rate": 6.499662442217496e-06, "loss": 0.2406, "loss_nan_ranks": 0, "loss_rank_avg": 0.22986085712909698, "step": 1095, "valid_targets_mean": 4128.3, "valid_targets_min": 808 }, { "epoch": 5.365853658536586, "grad_norm": 0.9787201186348055, "learning_rate": 6.3211251787383146e-06, "loss": 0.2506, "loss_nan_ranks": 0, "loss_rank_avg": 0.26659125089645386, "step": 1100, "valid_targets_mean": 2601.8, "valid_targets_min": 778 }, { "epoch": 5.390243902439025, "grad_norm": 0.7673661220615522, "learning_rate": 6.144612949107858e-06, "loss": 0.2786, "loss_nan_ranks": 0, "loss_rank_avg": 0.24559593200683594, "step": 1105, "valid_targets_mean": 4265.2, "valid_targets_min": 1373 }, { "epoch": 5.414634146341464, "grad_norm": 0.9843589364752354, "learning_rate": 5.970151884367206e-06, "loss": 0.2648, "loss_nan_ranks": 0, "loss_rank_avg": 0.2827834486961365, "step": 1110, "valid_targets_mean": 2506.9, "valid_targets_min": 15 }, { "epoch": 5.439024390243903, "grad_norm": 0.7949717862044275, "learning_rate": 5.7977678119010025e-06, "loss": 0.2537, "loss_nan_ranks": 0, "loss_rank_avg": 0.24906060099601746, "step": 1115, "valid_targets_mean": 3294.9, "valid_targets_min": 444 }, { "epoch": 5.463414634146342, "grad_norm": 0.9750761574373105, "learning_rate": 5.627486251613934e-06, "loss": 0.2409, "loss_nan_ranks": 0, "loss_rank_avg": 0.24433453381061554, "step": 1120, "valid_targets_mean": 2488.4, "valid_targets_min": 215 }, { "epoch": 5.487804878048781, "grad_norm": 0.8567488259221477, "learning_rate": 5.4593324121527554e-06, "loss": 0.2649, "loss_nan_ranks": 0, "loss_rank_avg": 0.26337265968322754, "step": 1125, "valid_targets_mean": 3024.3, "valid_targets_min": 959 }, { "epoch": 5.512195121951219, "grad_norm": 0.812251328892105, "learning_rate": 5.293331187174369e-06, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.23273511230945587, "step": 1130, "valid_targets_mean": 2921.6, "valid_targets_min": 347 }, { "epoch": 5.536585365853659, "grad_norm": 1.075912086276383, "learning_rate": 5.129507151660566e-06, "loss": 0.2579, "loss_nan_ranks": 0, "loss_rank_avg": 0.26345235109329224, "step": 1135, "valid_targets_mean": 2968.8, "valid_targets_min": 645 }, { "epoch": 5.560975609756097, "grad_norm": 0.7590504022526489, "learning_rate": 4.967884558279894e-06, "loss": 0.2434, "loss_nan_ranks": 0, "loss_rank_avg": 0.22883421182632446, "step": 1140, "valid_targets_mean": 3830.3, "valid_targets_min": 477 }, { "epoch": 5.585365853658536, "grad_norm": 0.9309391468937491, "learning_rate": 4.808487333797272e-06, "loss": 0.2543, "loss_nan_ranks": 0, "loss_rank_avg": 0.26027998328208923, "step": 1145, "valid_targets_mean": 2935.4, "valid_targets_min": 1080 }, { "epoch": 5.609756097560975, "grad_norm": 0.9093755260374964, "learning_rate": 4.651339075531873e-06, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.2955099642276764, "step": 1150, "valid_targets_mean": 2912.1, "valid_targets_min": 1208 }, { "epoch": 5.634146341463414, "grad_norm": 0.7573696056760568, "learning_rate": 4.496463047863728e-06, "loss": 0.248, "loss_nan_ranks": 0, "loss_rank_avg": 0.24394544959068298, "step": 1155, "valid_targets_mean": 3498.8, "valid_targets_min": 1321 }, { "epoch": 5.658536585365853, "grad_norm": 0.766437897262525, "learning_rate": 4.343882178789649e-06, "loss": 0.2457, "loss_nan_ranks": 0, "loss_rank_avg": 0.25831907987594604, "step": 1160, "valid_targets_mean": 3925.3, "valid_targets_min": 1203 }, { "epoch": 5.682926829268292, "grad_norm": 0.685642357911014, "learning_rate": 4.193619056528968e-06, "loss": 0.2536, "loss_nan_ranks": 0, "loss_rank_avg": 0.23799964785575867, "step": 1165, "valid_targets_mean": 4780.6, "valid_targets_min": 1015 }, { "epoch": 5.7073170731707314, "grad_norm": 0.8782575976505613, "learning_rate": 4.045695926179518e-06, "loss": 0.2331, "loss_nan_ranks": 0, "loss_rank_avg": 0.25392627716064453, "step": 1170, "valid_targets_mean": 3876.3, "valid_targets_min": 15 }, { "epoch": 5.7317073170731705, "grad_norm": 0.6746053110171998, "learning_rate": 3.900134686424497e-06, "loss": 0.2461, "loss_nan_ranks": 0, "loss_rank_avg": 0.22202837467193604, "step": 1175, "valid_targets_mean": 4104.9, "valid_targets_min": 1062 }, { "epoch": 5.7560975609756095, "grad_norm": 0.808869320274757, "learning_rate": 3.756956886290497e-06, "loss": 0.2532, "loss_nan_ranks": 0, "loss_rank_avg": 0.26272523403167725, "step": 1180, "valid_targets_mean": 3688.8, "valid_targets_min": 1201 }, { "epoch": 5.780487804878049, "grad_norm": 0.8464273251163792, "learning_rate": 3.6161837219574423e-06, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.26603326201438904, "step": 1185, "valid_targets_mean": 2798.3, "valid_targets_min": 1006 }, { "epoch": 5.804878048780488, "grad_norm": 0.9647474178559449, "learning_rate": 3.477836033620623e-06, "loss": 0.2452, "loss_nan_ranks": 0, "loss_rank_avg": 0.26358962059020996, "step": 1190, "valid_targets_mean": 2401.4, "valid_targets_min": 383 }, { "epoch": 5.829268292682927, "grad_norm": 0.9304341033869954, "learning_rate": 3.3419343024055385e-06, "loss": 0.2456, "loss_nan_ranks": 0, "loss_rank_avg": 0.2455274760723114, "step": 1195, "valid_targets_mean": 2808.2, "valid_targets_min": 940 }, { "epoch": 5.853658536585366, "grad_norm": 0.890111073772623, "learning_rate": 3.208498647335818e-06, "loss": 0.2436, "loss_nan_ranks": 0, "loss_rank_avg": 0.2638408839702606, "step": 1200, "valid_targets_mean": 3505.1, "valid_targets_min": 284 }, { "epoch": 5.878048780487805, "grad_norm": 0.7108698871221218, "learning_rate": 3.0775488223547946e-06, "loss": 0.2497, "loss_nan_ranks": 0, "loss_rank_avg": 0.25884100794792175, "step": 1205, "valid_targets_mean": 4374.9, "valid_targets_min": 859 }, { "epoch": 5.902439024390244, "grad_norm": 0.7134129262752172, "learning_rate": 2.949104213401126e-06, "loss": 0.2358, "loss_nan_ranks": 0, "loss_rank_avg": 0.23458099365234375, "step": 1210, "valid_targets_mean": 4402.4, "valid_targets_min": 960 }, { "epoch": 5.926829268292683, "grad_norm": 0.766327851314873, "learning_rate": 2.8231838355388564e-06, "loss": 0.2607, "loss_nan_ranks": 0, "loss_rank_avg": 0.26184824109077454, "step": 1215, "valid_targets_mean": 3815.8, "valid_targets_min": 893 }, { "epoch": 5.951219512195122, "grad_norm": 1.1353404399035876, "learning_rate": 2.6998063301424183e-06, "loss": 0.2592, "loss_nan_ranks": 0, "loss_rank_avg": 0.25338470935821533, "step": 1220, "valid_targets_mean": 2824.6, "valid_targets_min": 15 }, { "epoch": 5.975609756097561, "grad_norm": 0.8927504722814279, "learning_rate": 2.5789899621369576e-06, "loss": 0.2397, "loss_nan_ranks": 0, "loss_rank_avg": 0.24313412606716156, "step": 1225, "valid_targets_mean": 3381.8, "valid_targets_min": 317 }, { "epoch": 6.0, "grad_norm": 0.5993805237959021, "learning_rate": 2.4607526172943573e-06, "loss": 0.2363, "loss_nan_ranks": 0, "loss_rank_avg": 0.22973109781742096, "step": 1230, "valid_targets_mean": 5376.5, "valid_targets_min": 1426 }, { "epoch": 6.024390243902439, "grad_norm": 0.8354731757428007, "learning_rate": 2.345111799585418e-06, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.2252325713634491, "step": 1235, "valid_targets_mean": 3432.8, "valid_targets_min": 15 }, { "epoch": 6.048780487804878, "grad_norm": 0.6750361987361632, "learning_rate": 2.2320846285885536e-06, "loss": 0.2403, "loss_nan_ranks": 0, "loss_rank_avg": 0.18881835043430328, "step": 1240, "valid_targets_mean": 4703.5, "valid_targets_min": 909 }, { "epoch": 6.073170731707317, "grad_norm": 0.8147913212830856, "learning_rate": 2.1216878369554018e-06, "loss": 0.2407, "loss_nan_ranks": 0, "loss_rank_avg": 0.23672699928283691, "step": 1245, "valid_targets_mean": 3134.4, "valid_targets_min": 730 }, { "epoch": 6.097560975609756, "grad_norm": 0.7466076107590293, "learning_rate": 2.0139377679336914e-06, "loss": 0.2312, "loss_nan_ranks": 0, "loss_rank_avg": 0.22420379519462585, "step": 1250, "valid_targets_mean": 3622.1, "valid_targets_min": 424 }, { "epoch": 6.121951219512195, "grad_norm": 0.7651624761198161, "learning_rate": 1.908850372947775e-06, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.2247518002986908, "step": 1255, "valid_targets_mean": 4336.2, "valid_targets_min": 641 }, { "epoch": 6.146341463414634, "grad_norm": 1.0311971502086403, "learning_rate": 1.8064412092371687e-06, "loss": 0.2417, "loss_nan_ranks": 0, "loss_rank_avg": 0.23574134707450867, "step": 1260, "valid_targets_mean": 2097.2, "valid_targets_min": 15 }, { "epoch": 6.170731707317073, "grad_norm": 0.8187942030961283, "learning_rate": 1.7067254375534426e-06, "loss": 0.2408, "loss_nan_ranks": 0, "loss_rank_avg": 0.22960010170936584, "step": 1265, "valid_targets_mean": 3018.4, "valid_targets_min": 1903 }, { "epoch": 6.195121951219512, "grad_norm": 1.029146328727017, "learning_rate": 1.6097178199158147e-06, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.23877152800559998, "step": 1270, "valid_targets_mean": 3062.9, "valid_targets_min": 15 }, { "epoch": 6.219512195121951, "grad_norm": 0.8154855502925126, "learning_rate": 1.5154327174257487e-06, "loss": 0.2331, "loss_nan_ranks": 0, "loss_rank_avg": 0.2658793330192566, "step": 1275, "valid_targets_mean": 3184.2, "valid_targets_min": 879 }, { "epoch": 6.2439024390243905, "grad_norm": 0.8092122258788375, "learning_rate": 1.4238840881409411e-06, "loss": 0.2454, "loss_nan_ranks": 0, "loss_rank_avg": 0.25030407309532166, "step": 1280, "valid_targets_mean": 3467.3, "valid_targets_min": 979 }, { "epoch": 6.2682926829268295, "grad_norm": 0.7990164724748857, "learning_rate": 1.3350854850089556e-06, "loss": 0.2436, "loss_nan_ranks": 0, "loss_rank_avg": 0.24833282828330994, "step": 1285, "valid_targets_mean": 3502.2, "valid_targets_min": 947 }, { "epoch": 6.2926829268292686, "grad_norm": 0.9741101055525349, "learning_rate": 1.2490500538608186e-06, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.23695826530456543, "step": 1290, "valid_targets_mean": 3103.2, "valid_targets_min": 734 }, { "epoch": 6.317073170731708, "grad_norm": 0.7866383371157202, "learning_rate": 1.1657905314649054e-06, "loss": 0.2477, "loss_nan_ranks": 0, "loss_rank_avg": 0.23404628038406372, "step": 1295, "valid_targets_mean": 3434.3, "valid_targets_min": 518 }, { "epoch": 6.341463414634147, "grad_norm": 1.052443778688222, "learning_rate": 1.0853192436413761e-06, "loss": 0.2414, "loss_nan_ranks": 0, "loss_rank_avg": 0.26550811529159546, "step": 1300, "valid_targets_mean": 2595.0, "valid_targets_min": 15 }, { "epoch": 6.365853658536586, "grad_norm": 0.7368539211291545, "learning_rate": 1.0076481034374597e-06, "loss": 0.2386, "loss_nan_ranks": 0, "loss_rank_avg": 0.22750580310821533, "step": 1305, "valid_targets_mean": 4232.3, "valid_targets_min": 1352 }, { "epoch": 6.390243902439025, "grad_norm": 0.8172404431791918, "learning_rate": 9.327886093638217e-07, "loss": 0.2396, "loss_nan_ranks": 0, "loss_rank_avg": 0.228601336479187, "step": 1310, "valid_targets_mean": 3308.4, "valid_targets_min": 745 }, { "epoch": 6.414634146341464, "grad_norm": 0.9556714377855368, "learning_rate": 8.607518436923024e-07, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.22058895230293274, "step": 1315, "valid_targets_mean": 3972.5, "valid_targets_min": 1721 }, { "epoch": 6.439024390243903, "grad_norm": 0.8574584113032061, "learning_rate": 7.915484708153221e-07, "loss": 0.2365, "loss_nan_ranks": 0, "loss_rank_avg": 0.23830386996269226, "step": 1320, "valid_targets_mean": 3130.8, "valid_targets_min": 244 }, { "epoch": 6.463414634146342, "grad_norm": 0.7867469974906987, "learning_rate": 7.251887356670795e-07, "loss": 0.2262, "loss_nan_ranks": 0, "loss_rank_avg": 0.2254469245672226, "step": 1325, "valid_targets_mean": 3359.5, "valid_targets_min": 880 }, { "epoch": 6.487804878048781, "grad_norm": 0.7986596207181103, "learning_rate": 6.616824622069029e-07, "loss": 0.2411, "loss_nan_ranks": 0, "loss_rank_avg": 0.22942882776260376, "step": 1330, "valid_targets_mean": 3246.1, "valid_targets_min": 15 }, { "epoch": 6.512195121951219, "grad_norm": 0.8394244430546116, "learning_rate": 6.010390519648956e-07, "loss": 0.2348, "loss_nan_ranks": 0, "loss_rank_avg": 0.23827606439590454, "step": 1335, "valid_targets_mean": 3922.2, "valid_targets_min": 748 }, { "epoch": 6.536585365853659, "grad_norm": 1.0223514496941088, "learning_rate": 5.432674826501116e-07, "loss": 0.2437, "loss_nan_ranks": 0, "loss_rank_avg": 0.2501652240753174, "step": 1340, "valid_targets_mean": 2854.8, "valid_targets_min": 730 }, { "epoch": 6.560975609756097, "grad_norm": 0.8493598124019298, "learning_rate": 4.883763068215142e-07, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.2515867352485657, "step": 1345, "valid_targets_mean": 3138.7, "valid_targets_min": 638 }, { "epoch": 6.585365853658536, "grad_norm": 0.9193382117725917, "learning_rate": 4.363736506218197e-07, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.26839739084243774, "step": 1350, "valid_targets_mean": 2877.6, "valid_targets_min": 15 }, { "epoch": 6.609756097560975, "grad_norm": 0.8621845943305007, "learning_rate": 3.8726721257451137e-07, "loss": 0.2422, "loss_nan_ranks": 0, "loss_rank_avg": 0.23818087577819824, "step": 1355, "valid_targets_mean": 3133.2, "valid_targets_min": 699 }, { "epoch": 6.634146341463414, "grad_norm": 0.9578375613810739, "learning_rate": 3.410642624441352e-07, "loss": 0.2375, "loss_nan_ranks": 0, "loss_rank_avg": 0.24605871737003326, "step": 1360, "valid_targets_mean": 2749.3, "valid_targets_min": 383 }, { "epoch": 6.658536585365853, "grad_norm": 0.8904495992448669, "learning_rate": 2.977716401600894e-07, "loss": 0.2312, "loss_nan_ranks": 0, "loss_rank_avg": 0.24474391341209412, "step": 1365, "valid_targets_mean": 2796.8, "valid_targets_min": 954 }, { "epoch": 6.682926829268292, "grad_norm": 0.8518441087464355, "learning_rate": 2.573957548040107e-07, "loss": 0.2184, "loss_nan_ranks": 0, "loss_rank_avg": 0.19270995259284973, "step": 1370, "valid_targets_mean": 3499.1, "valid_targets_min": 893 }, { "epoch": 6.7073170731707314, "grad_norm": 0.7685128175135679, "learning_rate": 2.1994258366099253e-07, "loss": 0.2417, "loss_nan_ranks": 0, "loss_rank_avg": 0.24854759871959686, "step": 1375, "valid_targets_mean": 3583.8, "valid_targets_min": 744 }, { "epoch": 6.7317073170731705, "grad_norm": 0.8337143588524125, "learning_rate": 1.854176713346867e-07, "loss": 0.2674, "loss_nan_ranks": 0, "loss_rank_avg": 0.2495569884777069, "step": 1380, "valid_targets_mean": 3424.1, "valid_targets_min": 755 }, { "epoch": 6.7560975609756095, "grad_norm": 0.8172528668416672, "learning_rate": 1.538261289264842e-07, "loss": 0.2371, "loss_nan_ranks": 0, "loss_rank_avg": 0.2521843910217285, "step": 1385, "valid_targets_mean": 3401.9, "valid_targets_min": 1633 }, { "epoch": 6.780487804878049, "grad_norm": 0.7445112746003925, "learning_rate": 1.2517263327884944e-07, "loss": 0.2609, "loss_nan_ranks": 0, "loss_rank_avg": 0.2370169758796692, "step": 1390, "valid_targets_mean": 3845.2, "valid_targets_min": 474 }, { "epoch": 6.804878048780488, "grad_norm": 0.7797928451134836, "learning_rate": 9.94614262829785e-08, "loss": 0.2384, "loss_nan_ranks": 0, "loss_rank_avg": 0.23094391822814941, "step": 1395, "valid_targets_mean": 3594.2, "valid_targets_min": 1001 }, { "epoch": 6.829268292682927, "grad_norm": 0.8197547024772956, "learning_rate": 7.669631425079705e-08, "loss": 0.2448, "loss_nan_ranks": 0, "loss_rank_avg": 0.23373377323150635, "step": 1400, "valid_targets_mean": 3304.5, "valid_targets_min": 959 }, { "epoch": 6.853658536585366, "grad_norm": 0.819098164874208, "learning_rate": 5.6880667351497664e-08, "loss": 0.235, "loss_nan_ranks": 0, "loss_rank_avg": 0.2553420662879944, "step": 1405, "valid_targets_mean": 3185.7, "valid_targets_min": 1009 }, { "epoch": 6.878048780487805, "grad_norm": 0.7376496931499309, "learning_rate": 4.00174191125946e-08, "loss": 0.2342, "loss_nan_ranks": 0, "loss_rank_avg": 0.24166685342788696, "step": 1410, "valid_targets_mean": 4154.6, "valid_targets_min": 1356 }, { "epoch": 6.902439024390244, "grad_norm": 0.7007641583033006, "learning_rate": 2.6109065985655547e-08, "loss": 0.2368, "loss_nan_ranks": 0, "loss_rank_avg": 0.22494983673095703, "step": 1415, "valid_targets_mean": 4674.7, "valid_targets_min": 1114 }, { "epoch": 6.926829268292683, "grad_norm": 0.9573315937952928, "learning_rate": 1.515766697672172e-08, "loss": 0.2396, "loss_nan_ranks": 0, "loss_rank_avg": 0.2521101236343384, "step": 1420, "valid_targets_mean": 3319.0, "valid_targets_min": 1386 }, { "epoch": 6.951219512195122, "grad_norm": 0.7016789049970602, "learning_rate": 7.16484334148504e-09, "loss": 0.2342, "loss_nan_ranks": 0, "loss_rank_avg": 0.2223004400730133, "step": 1425, "valid_targets_mean": 4016.6, "valid_targets_min": 928 }, { "epoch": 6.975609756097561, "grad_norm": 0.8796275429117553, "learning_rate": 2.1317783452823315e-09, "loss": 0.2378, "loss_nan_ranks": 0, "loss_rank_avg": 0.2460535764694214, "step": 1430, "valid_targets_mean": 2961.6, "valid_targets_min": 1039 }, { "epoch": 7.0, "grad_norm": 0.8974037466412109, "learning_rate": 5.921708791767344e-11, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.24434766173362732, "step": 1435, "valid_targets_mean": 2891.9, "valid_targets_min": 658 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.24434766173362732, "step": 1435, "total_flos": 459870247321600.0, "train_loss": 0.3203928537069713, "train_runtime": 9814.9228, "train_samples_per_second": 2.332, "train_steps_per_second": 0.146, "valid_targets_mean": 2891.9, "valid_targets_min": 658 } ], "logging_steps": 5, "max_steps": 1435, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 459870247321600.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }