{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018518518518518517, "grad_norm": 27.69713638899012, "learning_rate": 8.465608465608466e-07, "loss": 0.8879, "loss_nan_ranks": 0, "loss_rank_avg": 0.4629935324192047, "step": 5, "valid_targets_mean": 5865.4, "valid_targets_min": 789 }, { "epoch": 0.037037037037037035, "grad_norm": 23.290742086079, "learning_rate": 1.904761904761905e-06, "loss": 0.87, "loss_nan_ranks": 0, "loss_rank_avg": 0.4101138412952423, "step": 10, "valid_targets_mean": 6581.6, "valid_targets_min": 4761 }, { "epoch": 0.05555555555555555, "grad_norm": 13.68077632229009, "learning_rate": 2.962962962962963e-06, "loss": 0.7768, "loss_nan_ranks": 0, "loss_rank_avg": 0.370077908039093, "step": 15, "valid_targets_mean": 7269.0, "valid_targets_min": 3491 }, { "epoch": 0.07407407407407407, "grad_norm": 4.83254717026584, "learning_rate": 4.0211640211640215e-06, "loss": 0.6639, "loss_nan_ranks": 0, "loss_rank_avg": 0.3309337794780731, "step": 20, "valid_targets_mean": 6683.6, "valid_targets_min": 1240 }, { "epoch": 0.09259259259259259, "grad_norm": 2.428166466749575, "learning_rate": 5.07936507936508e-06, "loss": 0.6093, "loss_nan_ranks": 0, "loss_rank_avg": 0.3472573757171631, "step": 25, "valid_targets_mean": 5540.1, "valid_targets_min": 501 }, { "epoch": 0.1111111111111111, "grad_norm": 1.8170151536729782, "learning_rate": 6.137566137566138e-06, "loss": 0.577, "loss_nan_ranks": 0, "loss_rank_avg": 0.288746178150177, "step": 30, "valid_targets_mean": 7024.4, "valid_targets_min": 413 }, { "epoch": 0.12962962962962962, "grad_norm": 1.360405445125232, "learning_rate": 7.195767195767196e-06, "loss": 0.546, "loss_nan_ranks": 0, "loss_rank_avg": 0.3221224844455719, "step": 35, "valid_targets_mean": 6184.5, "valid_targets_min": 4369 }, { "epoch": 0.14814814814814814, "grad_norm": 0.9196969375594887, "learning_rate": 8.253968253968254e-06, "loss": 0.5352, "loss_nan_ranks": 0, "loss_rank_avg": 0.2751440405845642, "step": 40, "valid_targets_mean": 6822.4, "valid_targets_min": 4627 }, { "epoch": 0.16666666666666666, "grad_norm": 0.765133314763819, "learning_rate": 9.312169312169313e-06, "loss": 0.5021, "loss_nan_ranks": 0, "loss_rank_avg": 0.3200388550758362, "step": 45, "valid_targets_mean": 7612.6, "valid_targets_min": 4008 }, { "epoch": 0.18518518518518517, "grad_norm": 0.6845104561118748, "learning_rate": 1.037037037037037e-05, "loss": 0.4855, "loss_nan_ranks": 0, "loss_rank_avg": 0.2742794156074524, "step": 50, "valid_targets_mean": 7739.4, "valid_targets_min": 391 }, { "epoch": 0.2037037037037037, "grad_norm": 0.627723124248625, "learning_rate": 1.1428571428571429e-05, "loss": 0.4741, "loss_nan_ranks": 0, "loss_rank_avg": 0.24860906600952148, "step": 55, "valid_targets_mean": 5759.1, "valid_targets_min": 479 }, { "epoch": 0.2222222222222222, "grad_norm": 0.5488873482745417, "learning_rate": 1.2486772486772486e-05, "loss": 0.4688, "loss_nan_ranks": 0, "loss_rank_avg": 0.23666323721408844, "step": 60, "valid_targets_mean": 7891.8, "valid_targets_min": 6024 }, { "epoch": 0.24074074074074073, "grad_norm": 0.5404794526206105, "learning_rate": 1.3544973544973545e-05, "loss": 0.4433, "loss_nan_ranks": 0, "loss_rank_avg": 0.21840626001358032, "step": 65, "valid_targets_mean": 7151.1, "valid_targets_min": 3780 }, { "epoch": 0.25925925925925924, "grad_norm": 0.5380782703461664, "learning_rate": 1.4603174603174603e-05, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.21182352304458618, "step": 70, "valid_targets_mean": 7776.1, "valid_targets_min": 5679 }, { "epoch": 0.2777777777777778, "grad_norm": 0.5450893010749523, "learning_rate": 1.5661375661375662e-05, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.20561331510543823, "step": 75, "valid_targets_mean": 6248.5, "valid_targets_min": 445 }, { "epoch": 0.2962962962962963, "grad_norm": 0.5317555635167633, "learning_rate": 1.671957671957672e-05, "loss": 0.4256, "loss_nan_ranks": 0, "loss_rank_avg": 0.2224772423505783, "step": 80, "valid_targets_mean": 7044.6, "valid_targets_min": 4290 }, { "epoch": 0.3148148148148148, "grad_norm": 0.5450655757884623, "learning_rate": 1.7777777777777777e-05, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.23075850307941437, "step": 85, "valid_targets_mean": 6590.8, "valid_targets_min": 2602 }, { "epoch": 0.3333333333333333, "grad_norm": 0.4891965175751207, "learning_rate": 1.8835978835978836e-05, "loss": 0.3825, "loss_nan_ranks": 0, "loss_rank_avg": 0.21840187907218933, "step": 90, "valid_targets_mean": 7733.4, "valid_targets_min": 4802 }, { "epoch": 0.35185185185185186, "grad_norm": 0.5051228466360034, "learning_rate": 1.9894179894179895e-05, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.18121924996376038, "step": 95, "valid_targets_mean": 7083.4, "valid_targets_min": 3807 }, { "epoch": 0.37037037037037035, "grad_norm": 0.6045358159055502, "learning_rate": 2.0952380952380954e-05, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.2092483937740326, "step": 100, "valid_targets_mean": 5927.2, "valid_targets_min": 3887 }, { "epoch": 0.3888888888888889, "grad_norm": 0.5039852241565205, "learning_rate": 2.2010582010582013e-05, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.16192597150802612, "step": 105, "valid_targets_mean": 6823.8, "valid_targets_min": 2851 }, { "epoch": 0.4074074074074074, "grad_norm": 0.543344814617639, "learning_rate": 2.3068783068783072e-05, "loss": 0.366, "loss_nan_ranks": 0, "loss_rank_avg": 0.21563449501991272, "step": 110, "valid_targets_mean": 7248.0, "valid_targets_min": 4263 }, { "epoch": 0.42592592592592593, "grad_norm": 0.51463194661137, "learning_rate": 2.4126984126984128e-05, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.20645663142204285, "step": 115, "valid_targets_mean": 7274.1, "valid_targets_min": 5029 }, { "epoch": 0.4444444444444444, "grad_norm": 0.5022729085110892, "learning_rate": 2.5185185185185187e-05, "loss": 0.3513, "loss_nan_ranks": 0, "loss_rank_avg": 0.1954728364944458, "step": 120, "valid_targets_mean": 8162.1, "valid_targets_min": 4772 }, { "epoch": 0.46296296296296297, "grad_norm": 0.48849343768132747, "learning_rate": 2.6243386243386246e-05, "loss": 0.3764, "loss_nan_ranks": 0, "loss_rank_avg": 0.22054851055145264, "step": 125, "valid_targets_mean": 9386.2, "valid_targets_min": 5811 }, { "epoch": 0.48148148148148145, "grad_norm": 0.6066721985482638, "learning_rate": 2.7301587301587305e-05, "loss": 0.3552, "loss_nan_ranks": 0, "loss_rank_avg": 0.17588719725608826, "step": 130, "valid_targets_mean": 4571.5, "valid_targets_min": 1134 }, { "epoch": 0.5, "grad_norm": 0.4931053049682215, "learning_rate": 2.835978835978836e-05, "loss": 0.3687, "loss_nan_ranks": 0, "loss_rank_avg": 0.14577624201774597, "step": 135, "valid_targets_mean": 6307.6, "valid_targets_min": 3966 }, { "epoch": 0.5185185185185185, "grad_norm": 0.6073682337976894, "learning_rate": 2.941798941798942e-05, "loss": 0.3514, "loss_nan_ranks": 0, "loss_rank_avg": 0.17465010285377502, "step": 140, "valid_targets_mean": 6394.0, "valid_targets_min": 269 }, { "epoch": 0.5370370370370371, "grad_norm": 0.5024739310945823, "learning_rate": 3.047619047619048e-05, "loss": 0.3496, "loss_nan_ranks": 0, "loss_rank_avg": 0.1507670283317566, "step": 145, "valid_targets_mean": 6376.0, "valid_targets_min": 466 }, { "epoch": 0.5555555555555556, "grad_norm": 0.5277273202526614, "learning_rate": 3.153439153439154e-05, "loss": 0.3585, "loss_nan_ranks": 0, "loss_rank_avg": 0.18677863478660583, "step": 150, "valid_targets_mean": 7588.2, "valid_targets_min": 3447 }, { "epoch": 0.5740740740740741, "grad_norm": 0.4927981212250953, "learning_rate": 3.259259259259259e-05, "loss": 0.343, "loss_nan_ranks": 0, "loss_rank_avg": 0.15430906414985657, "step": 155, "valid_targets_mean": 6208.2, "valid_targets_min": 1884 }, { "epoch": 0.5925925925925926, "grad_norm": 0.5955616973209993, "learning_rate": 3.3650793650793656e-05, "loss": 0.3475, "loss_nan_ranks": 0, "loss_rank_avg": 0.16549400985240936, "step": 160, "valid_targets_mean": 5810.4, "valid_targets_min": 409 }, { "epoch": 0.6111111111111112, "grad_norm": 0.5308592994318894, "learning_rate": 3.470899470899471e-05, "loss": 0.3252, "loss_nan_ranks": 0, "loss_rank_avg": 0.16406959295272827, "step": 165, "valid_targets_mean": 7197.9, "valid_targets_min": 4781 }, { "epoch": 0.6296296296296297, "grad_norm": 0.5786264253850772, "learning_rate": 3.576719576719577e-05, "loss": 0.334, "loss_nan_ranks": 0, "loss_rank_avg": 0.14584289491176605, "step": 170, "valid_targets_mean": 5856.1, "valid_targets_min": 3947 }, { "epoch": 0.6481481481481481, "grad_norm": 0.5876267301381748, "learning_rate": 3.682539682539683e-05, "loss": 0.3364, "loss_nan_ranks": 0, "loss_rank_avg": 0.19383975863456726, "step": 175, "valid_targets_mean": 6151.6, "valid_targets_min": 298 }, { "epoch": 0.6666666666666666, "grad_norm": 0.5579841097601295, "learning_rate": 3.7883597883597885e-05, "loss": 0.3355, "loss_nan_ranks": 0, "loss_rank_avg": 0.19771698117256165, "step": 180, "valid_targets_mean": 7475.2, "valid_targets_min": 5181 }, { "epoch": 0.6851851851851852, "grad_norm": 0.5575120093361495, "learning_rate": 3.894179894179894e-05, "loss": 0.3306, "loss_nan_ranks": 0, "loss_rank_avg": 0.13605374097824097, "step": 185, "valid_targets_mean": 6225.5, "valid_targets_min": 2414 }, { "epoch": 0.7037037037037037, "grad_norm": 0.5695877338034481, "learning_rate": 4e-05, "loss": 0.3331, "loss_nan_ranks": 0, "loss_rank_avg": 0.19986039400100708, "step": 190, "valid_targets_mean": 8463.9, "valid_targets_min": 4419 }, { "epoch": 0.7222222222222222, "grad_norm": 0.5009496122576277, "learning_rate": 3.999914723760517e-05, "loss": 0.3364, "loss_nan_ranks": 0, "loss_rank_avg": 0.1776013821363449, "step": 195, "valid_targets_mean": 7864.9, "valid_targets_min": 5879 }, { "epoch": 0.7407407407407407, "grad_norm": 0.5293768737385741, "learning_rate": 3.999658902314104e-05, "loss": 0.338, "loss_nan_ranks": 0, "loss_rank_avg": 0.1694238781929016, "step": 200, "valid_targets_mean": 8140.8, "valid_targets_min": 5488 }, { "epoch": 0.7592592592592593, "grad_norm": 0.8375558031870537, "learning_rate": 3.999232557476252e-05, "loss": 0.3319, "loss_nan_ranks": 0, "loss_rank_avg": 0.16278810799121857, "step": 205, "valid_targets_mean": 6144.5, "valid_targets_min": 2437 }, { "epoch": 0.7777777777777778, "grad_norm": 0.529694505104515, "learning_rate": 3.9986357256040465e-05, "loss": 0.324, "loss_nan_ranks": 0, "loss_rank_avg": 0.14480090141296387, "step": 210, "valid_targets_mean": 6128.9, "valid_targets_min": 353 }, { "epoch": 0.7962962962962963, "grad_norm": 0.5687228641593676, "learning_rate": 3.997868457593064e-05, "loss": 0.3187, "loss_nan_ranks": 0, "loss_rank_avg": 0.16306929290294647, "step": 215, "valid_targets_mean": 6418.0, "valid_targets_min": 3908 }, { "epoch": 0.8148148148148148, "grad_norm": 0.47438386161926516, "learning_rate": 3.996930818873035e-05, "loss": 0.3186, "loss_nan_ranks": 0, "loss_rank_avg": 0.14409326016902924, "step": 220, "valid_targets_mean": 6047.1, "valid_targets_min": 304 }, { "epoch": 0.8333333333333334, "grad_norm": 0.5486113720670046, "learning_rate": 3.9958228894022645e-05, "loss": 0.3106, "loss_nan_ranks": 0, "loss_rank_avg": 0.15029951930046082, "step": 225, "valid_targets_mean": 6173.9, "valid_targets_min": 3754 }, { "epoch": 0.8518518518518519, "grad_norm": 0.5170677946882957, "learning_rate": 3.994544763660811e-05, "loss": 0.3071, "loss_nan_ranks": 0, "loss_rank_avg": 0.16101841628551483, "step": 230, "valid_targets_mean": 8052.0, "valid_targets_min": 5204 }, { "epoch": 0.8703703703703703, "grad_norm": 0.5874905209063319, "learning_rate": 3.993096550642431e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.21433310210704803, "step": 235, "valid_targets_mean": 8033.4, "valid_targets_min": 4617 }, { "epoch": 0.8888888888888888, "grad_norm": 0.5772717839196063, "learning_rate": 3.991478373845286e-05, "loss": 0.3187, "loss_nan_ranks": 0, "loss_rank_avg": 0.15627962350845337, "step": 240, "valid_targets_mean": 6618.6, "valid_targets_min": 2600 }, { "epoch": 0.9074074074074074, "grad_norm": 0.5510574817704281, "learning_rate": 3.989690371261406e-05, "loss": 0.3214, "loss_nan_ranks": 0, "loss_rank_avg": 0.16640323400497437, "step": 245, "valid_targets_mean": 6397.0, "valid_targets_min": 456 }, { "epoch": 0.9259259259259259, "grad_norm": 0.530761854892717, "learning_rate": 3.987732695364929e-05, "loss": 0.3105, "loss_nan_ranks": 0, "loss_rank_avg": 0.14477106928825378, "step": 250, "valid_targets_mean": 5501.1, "valid_targets_min": 2206 }, { "epoch": 0.9444444444444444, "grad_norm": 0.5289325090905338, "learning_rate": 3.985605513099093e-05, "loss": 0.3163, "loss_nan_ranks": 0, "loss_rank_avg": 0.13415177166461945, "step": 255, "valid_targets_mean": 6082.1, "valid_targets_min": 5234 }, { "epoch": 0.9629629629629629, "grad_norm": 0.569051693937061, "learning_rate": 3.983309005862002e-05, "loss": 0.3324, "loss_nan_ranks": 0, "loss_rank_avg": 0.13313525915145874, "step": 260, "valid_targets_mean": 4545.2, "valid_targets_min": 669 }, { "epoch": 0.9814814814814815, "grad_norm": 0.5353604464324123, "learning_rate": 3.980843369491159e-05, "loss": 0.3171, "loss_nan_ranks": 0, "loss_rank_avg": 0.199254110455513, "step": 265, "valid_targets_mean": 7222.6, "valid_targets_min": 4705 }, { "epoch": 1.0, "grad_norm": 0.5103477604549704, "learning_rate": 3.9782088142467595e-05, "loss": 0.3107, "loss_nan_ranks": 0, "loss_rank_avg": 0.1165643036365509, "step": 270, "valid_targets_mean": 5523.9, "valid_targets_min": 396 }, { "epoch": 1.0185185185185186, "grad_norm": 0.5065171933118513, "learning_rate": 3.975405564793768e-05, "loss": 0.3119, "loss_nan_ranks": 0, "loss_rank_avg": 0.21201768517494202, "step": 275, "valid_targets_mean": 7694.2, "valid_targets_min": 434 }, { "epoch": 1.037037037037037, "grad_norm": 0.5475032237106019, "learning_rate": 3.972433860182757e-05, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.13425172865390778, "step": 280, "valid_targets_mean": 6122.1, "valid_targets_min": 377 }, { "epoch": 1.0555555555555556, "grad_norm": 0.59475901438283, "learning_rate": 3.969293953829519e-05, "loss": 0.2955, "loss_nan_ranks": 0, "loss_rank_avg": 0.14180830121040344, "step": 285, "valid_targets_mean": 7330.9, "valid_targets_min": 4746 }, { "epoch": 1.074074074074074, "grad_norm": 0.4732078699188954, "learning_rate": 3.965986113493462e-05, "loss": 0.2973, "loss_nan_ranks": 0, "loss_rank_avg": 0.14922069013118744, "step": 290, "valid_targets_mean": 7192.6, "valid_targets_min": 3965 }, { "epoch": 1.0925925925925926, "grad_norm": 0.5946837128648786, "learning_rate": 3.9625106212547696e-05, "loss": 0.2955, "loss_nan_ranks": 0, "loss_rank_avg": 0.12483422458171844, "step": 295, "valid_targets_mean": 4485.5, "valid_targets_min": 461 }, { "epoch": 1.1111111111111112, "grad_norm": 0.48537206737996996, "learning_rate": 3.9588677734903505e-05, "loss": 0.289, "loss_nan_ranks": 0, "loss_rank_avg": 0.14004018902778625, "step": 300, "valid_targets_mean": 7126.4, "valid_targets_min": 2065 }, { "epoch": 1.1296296296296295, "grad_norm": 0.5868179766871565, "learning_rate": 3.955057880848563e-05, "loss": 0.2944, "loss_nan_ranks": 0, "loss_rank_avg": 0.1457635760307312, "step": 305, "valid_targets_mean": 7584.9, "valid_targets_min": 3016 }, { "epoch": 1.1481481481481481, "grad_norm": 0.5531573803747889, "learning_rate": 3.9510812682227245e-05, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.1710018515586853, "step": 310, "valid_targets_mean": 6904.5, "valid_targets_min": 4493 }, { "epoch": 1.1666666666666667, "grad_norm": 0.5478913235405003, "learning_rate": 3.946938274723405e-05, "loss": 0.2974, "loss_nan_ranks": 0, "loss_rank_avg": 0.15083035826683044, "step": 315, "valid_targets_mean": 5984.2, "valid_targets_min": 308 }, { "epoch": 1.1851851851851851, "grad_norm": 0.47833830640118885, "learning_rate": 3.9426292536495114e-05, "loss": 0.3051, "loss_nan_ranks": 0, "loss_rank_avg": 0.15671434998512268, "step": 320, "valid_targets_mean": 8063.6, "valid_targets_min": 5433 }, { "epoch": 1.2037037037037037, "grad_norm": 0.5349890076251889, "learning_rate": 3.938154572458156e-05, "loss": 0.2926, "loss_nan_ranks": 0, "loss_rank_avg": 0.14529670774936676, "step": 325, "valid_targets_mean": 6739.8, "valid_targets_min": 5419 }, { "epoch": 1.2222222222222223, "grad_norm": 0.4994576555333995, "learning_rate": 3.9335146127333245e-05, "loss": 0.2906, "loss_nan_ranks": 0, "loss_rank_avg": 0.15106892585754395, "step": 330, "valid_targets_mean": 7467.2, "valid_targets_min": 3877 }, { "epoch": 1.2407407407407407, "grad_norm": 0.5935948253238553, "learning_rate": 3.928709770153332e-05, "loss": 0.2957, "loss_nan_ranks": 0, "loss_rank_avg": 0.14414165914058685, "step": 335, "valid_targets_mean": 4819.1, "valid_targets_min": 315 }, { "epoch": 1.2592592592592593, "grad_norm": 0.5524891188597137, "learning_rate": 3.923740454457087e-05, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.1580236554145813, "step": 340, "valid_targets_mean": 6985.0, "valid_targets_min": 474 }, { "epoch": 1.2777777777777777, "grad_norm": 0.5456561849473739, "learning_rate": 3.9186070894091433e-05, "loss": 0.3006, "loss_nan_ranks": 0, "loss_rank_avg": 0.18431127071380615, "step": 345, "valid_targets_mean": 6896.9, "valid_targets_min": 3800 }, { "epoch": 1.2962962962962963, "grad_norm": 0.5624233622754709, "learning_rate": 3.9133101127635684e-05, "loss": 0.2892, "loss_nan_ranks": 0, "loss_rank_avg": 0.15472449362277985, "step": 350, "valid_targets_mean": 7580.0, "valid_targets_min": 367 }, { "epoch": 1.3148148148148149, "grad_norm": 0.5889370024503714, "learning_rate": 3.9078499762266124e-05, "loss": 0.2896, "loss_nan_ranks": 0, "loss_rank_avg": 0.12132951617240906, "step": 355, "valid_targets_mean": 5551.5, "valid_targets_min": 470 }, { "epoch": 1.3333333333333333, "grad_norm": 0.5062166048938903, "learning_rate": 3.902227145418185e-05, "loss": 0.2883, "loss_nan_ranks": 0, "loss_rank_avg": 0.1225079745054245, "step": 360, "valid_targets_mean": 5971.0, "valid_targets_min": 368 }, { "epoch": 1.3518518518518519, "grad_norm": 0.47488441470103737, "learning_rate": 3.896442099832153e-05, "loss": 0.2855, "loss_nan_ranks": 0, "loss_rank_avg": 0.12804746627807617, "step": 365, "valid_targets_mean": 6091.5, "valid_targets_min": 1976 }, { "epoch": 1.3703703703703702, "grad_norm": 0.5406355069355051, "learning_rate": 3.89049533279545e-05, "loss": 0.295, "loss_nan_ranks": 0, "loss_rank_avg": 0.18195563554763794, "step": 370, "valid_targets_mean": 6963.6, "valid_targets_min": 480 }, { "epoch": 1.3888888888888888, "grad_norm": 0.5574169233693884, "learning_rate": 3.884387351426005e-05, "loss": 0.2924, "loss_nan_ranks": 0, "loss_rank_avg": 0.15529048442840576, "step": 375, "valid_targets_mean": 6006.2, "valid_targets_min": 541 }, { "epoch": 1.4074074074074074, "grad_norm": 0.535890457895036, "learning_rate": 3.8781186765895e-05, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.15748731791973114, "step": 380, "valid_targets_mean": 6537.9, "valid_targets_min": 4917 }, { "epoch": 1.425925925925926, "grad_norm": 0.5095040628778826, "learning_rate": 3.8716898428549526e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.09297989308834076, "step": 385, "valid_targets_mean": 4088.0, "valid_targets_min": 304 }, { "epoch": 1.4444444444444444, "grad_norm": 0.4716182875282023, "learning_rate": 3.865101398449127e-05, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.1432163268327713, "step": 390, "valid_targets_mean": 7432.4, "valid_targets_min": 5799 }, { "epoch": 1.462962962962963, "grad_norm": 0.5386315145255598, "learning_rate": 3.858353905209787e-05, "loss": 0.2921, "loss_nan_ranks": 0, "loss_rank_avg": 0.17763468623161316, "step": 395, "valid_targets_mean": 7208.8, "valid_targets_min": 4700 }, { "epoch": 1.4814814814814814, "grad_norm": 0.5378548282182605, "learning_rate": 3.8514479385377813e-05, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.15650640428066254, "step": 400, "valid_targets_mean": 6712.2, "valid_targets_min": 1883 }, { "epoch": 1.5, "grad_norm": 0.5142301123444354, "learning_rate": 3.844384087347978e-05, "loss": 0.3043, "loss_nan_ranks": 0, "loss_rank_avg": 0.1322391927242279, "step": 405, "valid_targets_mean": 6352.9, "valid_targets_min": 3290 }, { "epoch": 1.5185185185185186, "grad_norm": 0.5505544471616537, "learning_rate": 3.837162954019042e-05, "loss": 0.2814, "loss_nan_ranks": 0, "loss_rank_avg": 0.14132408797740936, "step": 410, "valid_targets_mean": 6377.4, "valid_targets_min": 3935 }, { "epoch": 1.5370370370370372, "grad_norm": 0.6229636570648036, "learning_rate": 3.829785154342069e-05, "loss": 0.2912, "loss_nan_ranks": 0, "loss_rank_avg": 0.15506146848201752, "step": 415, "valid_targets_mean": 7248.5, "valid_targets_min": 5048 }, { "epoch": 1.5555555555555556, "grad_norm": 0.7699539965412168, "learning_rate": 3.822251317468073e-05, "loss": 0.2915, "loss_nan_ranks": 0, "loss_rank_avg": 0.13349926471710205, "step": 420, "valid_targets_mean": 6603.4, "valid_targets_min": 336 }, { "epoch": 1.574074074074074, "grad_norm": 0.4695932327596188, "learning_rate": 3.814562085854328e-05, "loss": 0.2963, "loss_nan_ranks": 0, "loss_rank_avg": 0.14610108733177185, "step": 425, "valid_targets_mean": 7011.5, "valid_targets_min": 457 }, { "epoch": 1.5925925925925926, "grad_norm": 0.572965259354651, "learning_rate": 3.8067181152095935e-05, "loss": 0.2755, "loss_nan_ranks": 0, "loss_rank_avg": 0.12474419176578522, "step": 430, "valid_targets_mean": 6207.2, "valid_targets_min": 368 }, { "epoch": 1.6111111111111112, "grad_norm": 0.5072200634901262, "learning_rate": 3.7987200744381866e-05, "loss": 0.2787, "loss_nan_ranks": 0, "loss_rank_avg": 0.14492423832416534, "step": 435, "valid_targets_mean": 6685.0, "valid_targets_min": 3491 }, { "epoch": 1.6296296296296298, "grad_norm": 0.5315100425545446, "learning_rate": 3.790568645582949e-05, "loss": 0.2889, "loss_nan_ranks": 0, "loss_rank_avg": 0.12990285456180573, "step": 440, "valid_targets_mean": 5514.2, "valid_targets_min": 487 }, { "epoch": 1.6481481481481481, "grad_norm": 0.48299403677554154, "learning_rate": 3.7822645237670786e-05, "loss": 0.2859, "loss_nan_ranks": 0, "loss_rank_avg": 0.1585882604122162, "step": 445, "valid_targets_mean": 8019.8, "valid_targets_min": 4406 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5281572977294482, "learning_rate": 3.773808417134857e-05, "loss": 0.2927, "loss_nan_ranks": 0, "loss_rank_avg": 0.1543073058128357, "step": 450, "valid_targets_mean": 7052.0, "valid_targets_min": 3687 }, { "epoch": 1.6851851851851851, "grad_norm": 0.4845837149564397, "learning_rate": 3.7652010467912586e-05, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.15963464975357056, "step": 455, "valid_targets_mean": 8213.2, "valid_targets_min": 6411 }, { "epoch": 1.7037037037037037, "grad_norm": 0.5408032989769307, "learning_rate": 3.756443146740457e-05, "loss": 0.2984, "loss_nan_ranks": 0, "loss_rank_avg": 0.13530519604682922, "step": 460, "valid_targets_mean": 5813.6, "valid_targets_min": 3993 }, { "epoch": 1.7222222222222223, "grad_norm": 0.5353189538043924, "learning_rate": 3.7475354638232364e-05, "loss": 0.2744, "loss_nan_ranks": 0, "loss_rank_avg": 0.10430891811847687, "step": 465, "valid_targets_mean": 4659.6, "valid_targets_min": 1661 }, { "epoch": 1.7407407407407407, "grad_norm": 0.5226421018247178, "learning_rate": 3.7384787576532955e-05, "loss": 0.2848, "loss_nan_ranks": 0, "loss_rank_avg": 0.1390485018491745, "step": 470, "valid_targets_mean": 7290.6, "valid_targets_min": 810 }, { "epoch": 1.7592592592592593, "grad_norm": 0.49394601207037214, "learning_rate": 3.729273800552482e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.1647380143404007, "step": 475, "valid_targets_mean": 7383.2, "valid_targets_min": 5781 }, { "epoch": 1.7777777777777777, "grad_norm": 0.6224532686212161, "learning_rate": 3.719921377484919e-05, "loss": 0.2841, "loss_nan_ranks": 0, "loss_rank_avg": 0.14958155155181885, "step": 480, "valid_targets_mean": 6402.1, "valid_targets_min": 4041 }, { "epoch": 1.7962962962962963, "grad_norm": 0.5990070202349348, "learning_rate": 3.710422285990078e-05, "loss": 0.2858, "loss_nan_ranks": 0, "loss_rank_avg": 0.1453811526298523, "step": 485, "valid_targets_mean": 5552.4, "valid_targets_min": 192 }, { "epoch": 1.8148148148148149, "grad_norm": 0.47961741304787425, "learning_rate": 3.700777336114758e-05, "loss": 0.2809, "loss_nan_ranks": 0, "loss_rank_avg": 0.13036441802978516, "step": 490, "valid_targets_mean": 6451.1, "valid_targets_min": 307 }, { "epoch": 1.8333333333333335, "grad_norm": 0.5499567840797964, "learning_rate": 3.690987350344017e-05, "loss": 0.2686, "loss_nan_ranks": 0, "loss_rank_avg": 0.11820720136165619, "step": 495, "valid_targets_mean": 6068.8, "valid_targets_min": 487 }, { "epoch": 1.8518518518518519, "grad_norm": 0.5346461604872333, "learning_rate": 3.681053163531024e-05, "loss": 0.2839, "loss_nan_ranks": 0, "loss_rank_avg": 0.1634947955608368, "step": 500, "valid_targets_mean": 7725.8, "valid_targets_min": 4799 }, { "epoch": 1.8703703703703702, "grad_norm": 0.5253112100424155, "learning_rate": 3.6709756228258735e-05, "loss": 0.2876, "loss_nan_ranks": 0, "loss_rank_avg": 0.13562235236167908, "step": 505, "valid_targets_mean": 6321.1, "valid_targets_min": 3914 }, { "epoch": 1.8888888888888888, "grad_norm": 0.49379712991928504, "learning_rate": 3.66075558760334e-05, "loss": 0.2768, "loss_nan_ranks": 0, "loss_rank_avg": 0.11750568449497223, "step": 510, "valid_targets_mean": 6793.4, "valid_targets_min": 321 }, { "epoch": 1.9074074074074074, "grad_norm": 0.5151893286966898, "learning_rate": 3.6503939293895945e-05, "loss": 0.2815, "loss_nan_ranks": 0, "loss_rank_avg": 0.14757773280143738, "step": 515, "valid_targets_mean": 8412.4, "valid_targets_min": 6081 }, { "epoch": 1.925925925925926, "grad_norm": 0.5044055970161373, "learning_rate": 3.639891531787885e-05, "loss": 0.2634, "loss_nan_ranks": 0, "loss_rank_avg": 0.14695027470588684, "step": 520, "valid_targets_mean": 6752.4, "valid_targets_min": 245 }, { "epoch": 1.9444444444444444, "grad_norm": 0.5430440383866891, "learning_rate": 3.6292492904031844e-05, "loss": 0.2853, "loss_nan_ranks": 0, "loss_rank_avg": 0.11272375285625458, "step": 525, "valid_targets_mean": 4375.9, "valid_targets_min": 357 }, { "epoch": 1.9629629629629628, "grad_norm": 0.535911996260599, "learning_rate": 3.6184681127658166e-05, "loss": 0.2824, "loss_nan_ranks": 0, "loss_rank_avg": 0.1217561811208725, "step": 530, "valid_targets_mean": 4825.1, "valid_targets_min": 403 }, { "epoch": 1.9814814814814814, "grad_norm": 0.4652442437519838, "learning_rate": 3.607548918254068e-05, "loss": 0.2859, "loss_nan_ranks": 0, "loss_rank_avg": 0.13177233934402466, "step": 535, "valid_targets_mean": 7426.4, "valid_targets_min": 4939 }, { "epoch": 2.0, "grad_norm": 0.526551082703696, "learning_rate": 3.5964926380157856e-05, "loss": 0.2848, "loss_nan_ranks": 0, "loss_rank_avg": 0.16320262849330902, "step": 540, "valid_targets_mean": 7920.5, "valid_targets_min": 4695 }, { "epoch": 2.0185185185185186, "grad_norm": 0.5416505181112615, "learning_rate": 3.585300214888971e-05, "loss": 0.2666, "loss_nan_ranks": 0, "loss_rank_avg": 0.14433258771896362, "step": 545, "valid_targets_mean": 7458.1, "valid_targets_min": 3929 }, { "epoch": 2.037037037037037, "grad_norm": 0.574840102953103, "learning_rate": 3.5739726033213785e-05, "loss": 0.2608, "loss_nan_ranks": 0, "loss_rank_avg": 0.11546637117862701, "step": 550, "valid_targets_mean": 5356.0, "valid_targets_min": 385 }, { "epoch": 2.0555555555555554, "grad_norm": 0.5607643377480926, "learning_rate": 3.562510769289124e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.13067400455474854, "step": 555, "valid_targets_mean": 6073.8, "valid_targets_min": 250 }, { "epoch": 2.074074074074074, "grad_norm": 0.5218117367647941, "learning_rate": 3.550915690214313e-05, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.14398810267448425, "step": 560, "valid_targets_mean": 6835.8, "valid_targets_min": 3422 }, { "epoch": 2.0925925925925926, "grad_norm": 0.5934505930381603, "learning_rate": 3.539188354881685e-05, "loss": 0.2715, "loss_nan_ranks": 0, "loss_rank_avg": 0.13066671788692474, "step": 565, "valid_targets_mean": 6965.4, "valid_targets_min": 4297 }, { "epoch": 2.111111111111111, "grad_norm": 0.4708347758965413, "learning_rate": 3.527329763354295e-05, "loss": 0.26, "loss_nan_ranks": 0, "loss_rank_avg": 0.12042200565338135, "step": 570, "valid_targets_mean": 7412.5, "valid_targets_min": 3336 }, { "epoch": 2.1296296296296298, "grad_norm": 0.5708517268952519, "learning_rate": 3.515340926888236e-05, "loss": 0.2575, "loss_nan_ranks": 0, "loss_rank_avg": 0.1318579465150833, "step": 575, "valid_targets_mean": 6028.6, "valid_targets_min": 4547 }, { "epoch": 2.148148148148148, "grad_norm": 0.5178407866313944, "learning_rate": 3.503222867846397e-05, "loss": 0.2616, "loss_nan_ranks": 0, "loss_rank_avg": 0.10885745286941528, "step": 580, "valid_targets_mean": 5752.1, "valid_targets_min": 257 }, { "epoch": 2.1666666666666665, "grad_norm": 0.5863919284533499, "learning_rate": 3.490976619611282e-05, "loss": 0.2655, "loss_nan_ranks": 0, "loss_rank_avg": 0.13897745311260223, "step": 585, "valid_targets_mean": 6269.6, "valid_targets_min": 3887 }, { "epoch": 2.185185185185185, "grad_norm": 0.5881076768641105, "learning_rate": 3.47860322649689e-05, "loss": 0.2611, "loss_nan_ranks": 0, "loss_rank_avg": 0.08158881962299347, "step": 590, "valid_targets_mean": 4183.5, "valid_targets_min": 353 }, { "epoch": 2.2037037037037037, "grad_norm": 0.5449342889554193, "learning_rate": 3.4661037436596526e-05, "loss": 0.2686, "loss_nan_ranks": 0, "loss_rank_avg": 0.12910135090351105, "step": 595, "valid_targets_mean": 7711.6, "valid_targets_min": 5163 }, { "epoch": 2.2222222222222223, "grad_norm": 0.5061684984724052, "learning_rate": 3.453479237008465e-05, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.15672308206558228, "step": 600, "valid_targets_mean": 8003.1, "valid_targets_min": 5234 }, { "epoch": 2.240740740740741, "grad_norm": 0.524095008431537, "learning_rate": 3.4407307831137775e-05, "loss": 0.259, "loss_nan_ranks": 0, "loss_rank_avg": 0.12406113743782043, "step": 605, "valid_targets_mean": 6011.2, "valid_targets_min": 2463 }, { "epoch": 2.259259259259259, "grad_norm": 0.4572943901172685, "learning_rate": 3.4278594691157985e-05, "loss": 0.2769, "loss_nan_ranks": 0, "loss_rank_avg": 0.1535755693912506, "step": 610, "valid_targets_mean": 8442.5, "valid_targets_min": 4737 }, { "epoch": 2.2777777777777777, "grad_norm": 0.48298994041640353, "learning_rate": 3.4148663926317826e-05, "loss": 0.2567, "loss_nan_ranks": 0, "loss_rank_avg": 0.10671807080507278, "step": 615, "valid_targets_mean": 6054.6, "valid_targets_min": 446 }, { "epoch": 2.2962962962962963, "grad_norm": 0.5492340875103793, "learning_rate": 3.401752661662431e-05, "loss": 0.2618, "loss_nan_ranks": 0, "loss_rank_avg": 0.15312159061431885, "step": 620, "valid_targets_mean": 6727.8, "valid_targets_min": 2786 }, { "epoch": 2.314814814814815, "grad_norm": 0.5392228802956868, "learning_rate": 3.388519394497408e-05, "loss": 0.2635, "loss_nan_ranks": 0, "loss_rank_avg": 0.13924045860767365, "step": 625, "valid_targets_mean": 6856.8, "valid_targets_min": 5211 }, { "epoch": 2.3333333333333335, "grad_norm": 0.5233591164759019, "learning_rate": 3.375167719619972e-05, "loss": 0.2628, "loss_nan_ranks": 0, "loss_rank_avg": 0.14242339134216309, "step": 630, "valid_targets_mean": 6251.1, "valid_targets_min": 405 }, { "epoch": 2.351851851851852, "grad_norm": 0.4419020062221808, "learning_rate": 3.361698775610748e-05, "loss": 0.2569, "loss_nan_ranks": 0, "loss_rank_avg": 0.12432446330785751, "step": 635, "valid_targets_mean": 6905.8, "valid_targets_min": 4639 }, { "epoch": 2.3703703703703702, "grad_norm": 0.5468809658880694, "learning_rate": 3.3481137110506305e-05, "loss": 0.2613, "loss_nan_ranks": 0, "loss_rank_avg": 0.15010154247283936, "step": 640, "valid_targets_mean": 5742.6, "valid_targets_min": 396 }, { "epoch": 2.388888888888889, "grad_norm": 0.4848873250194103, "learning_rate": 3.334413684422839e-05, "loss": 0.2691, "loss_nan_ranks": 0, "loss_rank_avg": 0.14694947004318237, "step": 645, "valid_targets_mean": 8914.1, "valid_targets_min": 6186 }, { "epoch": 2.4074074074074074, "grad_norm": 0.4912012424730424, "learning_rate": 3.3205998640141255e-05, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.12130933254957199, "step": 650, "valid_targets_mean": 6549.1, "valid_targets_min": 415 }, { "epoch": 2.425925925925926, "grad_norm": 0.4437323909959092, "learning_rate": 3.3066734278151464e-05, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.11655676364898682, "step": 655, "valid_targets_mean": 6480.2, "valid_targets_min": 117 }, { "epoch": 2.4444444444444446, "grad_norm": 0.4719184803629788, "learning_rate": 3.292635563420009e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.12040881812572479, "step": 660, "valid_targets_mean": 5791.6, "valid_targets_min": 421 }, { "epoch": 2.462962962962963, "grad_norm": 0.5216259861354975, "learning_rate": 3.2784874679250026e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.12373167276382446, "step": 665, "valid_targets_mean": 6689.1, "valid_targets_min": 4072 }, { "epoch": 2.4814814814814814, "grad_norm": 0.4482229745629832, "learning_rate": 3.264230347826504e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.13821570575237274, "step": 670, "valid_targets_mean": 7872.4, "valid_targets_min": 4265 }, { "epoch": 2.5, "grad_norm": 0.4688397809024902, "learning_rate": 3.249865418918102e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.12180155515670776, "step": 675, "valid_targets_mean": 8303.1, "valid_targets_min": 5882 }, { "epoch": 2.5185185185185186, "grad_norm": 0.5067336932293169, "learning_rate": 3.2353939061869145e-05, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.17937231063842773, "step": 680, "valid_targets_mean": 7677.5, "valid_targets_min": 385 }, { "epoch": 2.537037037037037, "grad_norm": 0.495004907760756, "learning_rate": 3.2208170437091267e-05, "loss": 0.2754, "loss_nan_ranks": 0, "loss_rank_avg": 0.12227485328912735, "step": 685, "valid_targets_mean": 6554.2, "valid_targets_min": 460 }, { "epoch": 2.5555555555555554, "grad_norm": 0.46262512362964875, "learning_rate": 3.206136074544754e-05, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.1247977614402771, "step": 690, "valid_targets_mean": 5935.9, "valid_targets_min": 413 }, { "epoch": 2.574074074074074, "grad_norm": 0.577326499426772, "learning_rate": 3.1913522506316396e-05, "loss": 0.2688, "loss_nan_ranks": 0, "loss_rank_avg": 0.12780506908893585, "step": 695, "valid_targets_mean": 6527.5, "valid_targets_min": 229 }, { "epoch": 2.5925925925925926, "grad_norm": 0.5017879104054451, "learning_rate": 3.17646683267869e-05, "loss": 0.2683, "loss_nan_ranks": 0, "loss_rank_avg": 0.14587455987930298, "step": 700, "valid_targets_mean": 6552.1, "valid_targets_min": 1068 }, { "epoch": 2.611111111111111, "grad_norm": 0.5078274256321608, "learning_rate": 3.161481090058374e-05, "loss": 0.2677, "loss_nan_ranks": 0, "loss_rank_avg": 0.13253484666347504, "step": 705, "valid_targets_mean": 7849.0, "valid_targets_min": 5687 }, { "epoch": 2.6296296296296298, "grad_norm": 0.48831962900628234, "learning_rate": 3.146396300698467e-05, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.11534512788057327, "step": 710, "valid_targets_mean": 5541.4, "valid_targets_min": 475 }, { "epoch": 2.648148148148148, "grad_norm": 1.7586484361894488, "learning_rate": 3.1312137509730776e-05, "loss": 0.2474, "loss_nan_ranks": 0, "loss_rank_avg": 0.10342179238796234, "step": 715, "valid_targets_mean": 4904.9, "valid_targets_min": 3551 }, { "epoch": 2.6666666666666665, "grad_norm": 0.5727289688315167, "learning_rate": 3.115934735592954e-05, "loss": 0.2506, "loss_nan_ranks": 0, "loss_rank_avg": 0.15417921543121338, "step": 720, "valid_targets_mean": 5637.8, "valid_targets_min": 457 }, { "epoch": 2.685185185185185, "grad_norm": 0.5013909576024947, "learning_rate": 3.10056055749507e-05, "loss": 0.253, "loss_nan_ranks": 0, "loss_rank_avg": 0.12603460252285004, "step": 725, "valid_targets_mean": 7198.4, "valid_targets_min": 4333 }, { "epoch": 2.7037037037037037, "grad_norm": 0.5511286906952175, "learning_rate": 3.0850925277315193e-05, "loss": 0.2614, "loss_nan_ranks": 0, "loss_rank_avg": 0.1324155628681183, "step": 730, "valid_targets_mean": 6283.8, "valid_targets_min": 501 }, { "epoch": 2.7222222222222223, "grad_norm": 0.5435986623296872, "learning_rate": 3.0695319653577116e-05, "loss": 0.2538, "loss_nan_ranks": 0, "loss_rank_avg": 0.11377684026956558, "step": 735, "valid_targets_mean": 5828.4, "valid_targets_min": 3486 }, { "epoch": 2.7407407407407405, "grad_norm": 0.48998670607021577, "learning_rate": 3.0538801973198914e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.11242972314357758, "step": 740, "valid_targets_mean": 6409.4, "valid_targets_min": 3687 }, { "epoch": 2.7592592592592595, "grad_norm": 0.5181670834094368, "learning_rate": 3.0381385583419783e-05, "loss": 0.2762, "loss_nan_ranks": 0, "loss_rank_avg": 0.11965565383434296, "step": 745, "valid_targets_mean": 6171.4, "valid_targets_min": 3598 }, { "epoch": 2.7777777777777777, "grad_norm": 0.4892760482968971, "learning_rate": 3.0223083908117466e-05, "loss": 0.2551, "loss_nan_ranks": 0, "loss_rank_avg": 0.1174713522195816, "step": 750, "valid_targets_mean": 6405.0, "valid_targets_min": 4515 }, { "epoch": 2.7962962962962963, "grad_norm": 0.6775974514440799, "learning_rate": 3.0063910446663542e-05, "loss": 0.2613, "loss_nan_ranks": 0, "loss_rank_avg": 0.12394280731678009, "step": 755, "valid_targets_mean": 5689.2, "valid_targets_min": 472 }, { "epoch": 2.814814814814815, "grad_norm": 0.48632532617444213, "learning_rate": 2.9903878772772227e-05, "loss": 0.2561, "loss_nan_ranks": 0, "loss_rank_avg": 0.12731653451919556, "step": 760, "valid_targets_mean": 7426.2, "valid_targets_min": 3712 }, { "epoch": 2.8333333333333335, "grad_norm": 0.46253818350114984, "learning_rate": 2.9743002533342876e-05, "loss": 0.2602, "loss_nan_ranks": 0, "loss_rank_avg": 0.11572107672691345, "step": 765, "valid_targets_mean": 7458.6, "valid_targets_min": 3852 }, { "epoch": 2.851851851851852, "grad_norm": 0.5203069527342822, "learning_rate": 2.9581295447296202e-05, "loss": 0.262, "loss_nan_ranks": 0, "loss_rank_avg": 0.10665138065814972, "step": 770, "valid_targets_mean": 5312.9, "valid_targets_min": 370 }, { "epoch": 2.8703703703703702, "grad_norm": 0.508992244499264, "learning_rate": 2.9418771304404408e-05, "loss": 0.2529, "loss_nan_ranks": 0, "loss_rank_avg": 0.13962328433990479, "step": 775, "valid_targets_mean": 6861.0, "valid_targets_min": 2838 }, { "epoch": 2.888888888888889, "grad_norm": 0.48349040691849704, "learning_rate": 2.9255443964115217e-05, "loss": 0.266, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340981125831604, "step": 780, "valid_targets_mean": 7161.6, "valid_targets_min": 3762 }, { "epoch": 2.9074074074074074, "grad_norm": 0.47898223750895574, "learning_rate": 2.9091327354370014e-05, "loss": 0.2538, "loss_nan_ranks": 0, "loss_rank_avg": 0.13395154476165771, "step": 785, "valid_targets_mean": 6627.9, "valid_targets_min": 69 }, { "epoch": 2.925925925925926, "grad_norm": 0.4541657970273562, "learning_rate": 2.8926435470416123e-05, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.1160229966044426, "step": 790, "valid_targets_mean": 6603.0, "valid_targets_min": 572 }, { "epoch": 2.9444444444444446, "grad_norm": 0.5540368967272006, "learning_rate": 2.8760782373613322e-05, "loss": 0.2497, "loss_nan_ranks": 0, "loss_rank_avg": 0.13160249590873718, "step": 795, "valid_targets_mean": 6338.4, "valid_targets_min": 411 }, { "epoch": 2.962962962962963, "grad_norm": 0.7359958376980468, "learning_rate": 2.859438219023477e-05, "loss": 0.2654, "loss_nan_ranks": 0, "loss_rank_avg": 0.15799115598201752, "step": 800, "valid_targets_mean": 5611.8, "valid_targets_min": 273 }, { "epoch": 2.9814814814814814, "grad_norm": 0.4824226714339216, "learning_rate": 2.8427249110262346e-05, "loss": 0.2508, "loss_nan_ranks": 0, "loss_rank_avg": 0.1053648293018341, "step": 805, "valid_targets_mean": 5927.5, "valid_targets_min": 489 }, { "epoch": 3.0, "grad_norm": 0.5382552432072585, "learning_rate": 2.8259397386176616e-05, "loss": 0.2581, "loss_nan_ranks": 0, "loss_rank_avg": 0.11860036849975586, "step": 810, "valid_targets_mean": 6269.6, "valid_targets_min": 5439 }, { "epoch": 3.0185185185185186, "grad_norm": 1.273808109588435, "learning_rate": 2.809084133174139e-05, "loss": 0.2365, "loss_nan_ranks": 0, "loss_rank_avg": 0.09889354556798935, "step": 815, "valid_targets_mean": 6494.2, "valid_targets_min": 4860 }, { "epoch": 3.037037037037037, "grad_norm": 0.5296610389294772, "learning_rate": 2.792159532078314e-05, "loss": 0.2369, "loss_nan_ranks": 0, "loss_rank_avg": 0.1007370874285698, "step": 820, "valid_targets_mean": 6074.8, "valid_targets_min": 3754 }, { "epoch": 3.0555555555555554, "grad_norm": 0.5109827190620847, "learning_rate": 2.775167378596522e-05, "loss": 0.2452, "loss_nan_ranks": 0, "loss_rank_avg": 0.1191176027059555, "step": 825, "valid_targets_mean": 6418.1, "valid_targets_min": 965 }, { "epoch": 3.074074074074074, "grad_norm": 0.5158436295503185, "learning_rate": 2.7581091217557134e-05, "loss": 0.2425, "loss_nan_ranks": 0, "loss_rank_avg": 0.13595688343048096, "step": 830, "valid_targets_mean": 6856.4, "valid_targets_min": 2920 }, { "epoch": 3.0925925925925926, "grad_norm": 0.5078354894585061, "learning_rate": 2.740986216219884e-05, "loss": 0.2413, "loss_nan_ranks": 0, "loss_rank_avg": 0.12502720952033997, "step": 835, "valid_targets_mean": 6669.6, "valid_targets_min": 4233 }, { "epoch": 3.111111111111111, "grad_norm": 0.5054778272970494, "learning_rate": 2.7238001221660257e-05, "loss": 0.2398, "loss_nan_ranks": 0, "loss_rank_avg": 0.12242163717746735, "step": 840, "valid_targets_mean": 8300.9, "valid_targets_min": 7173 }, { "epoch": 3.1296296296296298, "grad_norm": 0.5126564174818152, "learning_rate": 2.7065523051596114e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.12792211771011353, "step": 845, "valid_targets_mean": 6557.4, "valid_targets_min": 1560 }, { "epoch": 3.148148148148148, "grad_norm": 0.6105982078938521, "learning_rate": 2.6892442360296152e-05, "loss": 0.2414, "loss_nan_ranks": 0, "loss_rank_avg": 0.12742607295513153, "step": 850, "valid_targets_mean": 6655.6, "valid_targets_min": 2065 }, { "epoch": 3.1666666666666665, "grad_norm": 0.49317723663403246, "learning_rate": 2.6718773907430847e-05, "loss": 0.2364, "loss_nan_ranks": 0, "loss_rank_avg": 0.11097238957881927, "step": 855, "valid_targets_mean": 6589.4, "valid_targets_min": 250 }, { "epoch": 3.185185185185185, "grad_norm": 0.5015576290750604, "learning_rate": 2.6544532502792778e-05, "loss": 0.2467, "loss_nan_ranks": 0, "loss_rank_avg": 0.13182875514030457, "step": 860, "valid_targets_mean": 6727.2, "valid_targets_min": 478 }, { "epoch": 3.2037037037037037, "grad_norm": 0.5212344941398274, "learning_rate": 2.6369733005033693e-05, "loss": 0.2559, "loss_nan_ranks": 0, "loss_rank_avg": 0.13697285950183868, "step": 865, "valid_targets_mean": 7515.0, "valid_targets_min": 5976 }, { "epoch": 3.2222222222222223, "grad_norm": 0.5508648846761911, "learning_rate": 2.6194390320397426e-05, "loss": 0.2352, "loss_nan_ranks": 0, "loss_rank_avg": 0.09470212459564209, "step": 870, "valid_targets_mean": 6168.4, "valid_targets_min": 306 }, { "epoch": 3.240740740740741, "grad_norm": 0.44089806796960995, "learning_rate": 2.601851940144874e-05, "loss": 0.2532, "loss_nan_ranks": 0, "loss_rank_avg": 0.09514069557189941, "step": 875, "valid_targets_mean": 7256.9, "valid_targets_min": 5156 }, { "epoch": 3.259259259259259, "grad_norm": 0.9948233767091689, "learning_rate": 2.5842135245798248e-05, "loss": 0.2346, "loss_nan_ranks": 0, "loss_rank_avg": 0.11571880429983139, "step": 880, "valid_targets_mean": 7016.8, "valid_targets_min": 488 }, { "epoch": 3.2777777777777777, "grad_norm": 0.5404910292302445, "learning_rate": 2.5665252894823436e-05, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.1577197015285492, "step": 885, "valid_targets_mean": 7343.0, "valid_targets_min": 5338 }, { "epoch": 3.2962962962962963, "grad_norm": 0.47094356670247445, "learning_rate": 2.5487887432386035e-05, "loss": 0.2415, "loss_nan_ranks": 0, "loss_rank_avg": 0.13475333154201508, "step": 890, "valid_targets_mean": 7784.5, "valid_targets_min": 6171 }, { "epoch": 3.314814814814815, "grad_norm": 0.5046387190727711, "learning_rate": 2.531005398354569e-05, "loss": 0.2566, "loss_nan_ranks": 0, "loss_rank_avg": 0.11572615802288055, "step": 895, "valid_targets_mean": 6777.4, "valid_targets_min": 3828 }, { "epoch": 3.3333333333333335, "grad_norm": 0.5357822466955011, "learning_rate": 2.5131767713270174e-05, "loss": 0.2357, "loss_nan_ranks": 0, "loss_rank_avg": 0.12361173331737518, "step": 900, "valid_targets_mean": 6211.9, "valid_targets_min": 407 }, { "epoch": 3.351851851851852, "grad_norm": 0.503314513417158, "learning_rate": 2.4953043825142164e-05, "loss": 0.2383, "loss_nan_ranks": 0, "loss_rank_avg": 0.10359904170036316, "step": 905, "valid_targets_mean": 6026.4, "valid_targets_min": 398 }, { "epoch": 3.3703703703703702, "grad_norm": 0.5303301465182496, "learning_rate": 2.477389756006276e-05, "loss": 0.2406, "loss_nan_ranks": 0, "loss_rank_avg": 0.12895119190216064, "step": 910, "valid_targets_mean": 6841.1, "valid_targets_min": 3687 }, { "epoch": 3.388888888888889, "grad_norm": 0.47181710270046207, "learning_rate": 2.4594344194951748e-05, "loss": 0.238, "loss_nan_ranks": 0, "loss_rank_avg": 0.13982756435871124, "step": 915, "valid_targets_mean": 7695.1, "valid_targets_min": 6655 }, { "epoch": 3.4074074074074074, "grad_norm": 0.6978123307943264, "learning_rate": 2.4414399041444897e-05, "loss": 0.2297, "loss_nan_ranks": 0, "loss_rank_avg": 0.12279509752988815, "step": 920, "valid_targets_mean": 5957.0, "valid_targets_min": 424 }, { "epoch": 3.425925925925926, "grad_norm": 0.5103058494828053, "learning_rate": 2.423407744458822e-05, "loss": 0.2476, "loss_nan_ranks": 0, "loss_rank_avg": 0.10531405359506607, "step": 925, "valid_targets_mean": 6795.9, "valid_targets_min": 3746 }, { "epoch": 3.4444444444444446, "grad_norm": 0.4961262749151785, "learning_rate": 2.405339478152938e-05, "loss": 0.2424, "loss_nan_ranks": 0, "loss_rank_avg": 0.13874265551567078, "step": 930, "valid_targets_mean": 7975.0, "valid_targets_min": 4268 }, { "epoch": 3.462962962962963, "grad_norm": 0.5574805174642966, "learning_rate": 2.387236646020643e-05, "loss": 0.2483, "loss_nan_ranks": 0, "loss_rank_avg": 0.12402337044477463, "step": 935, "valid_targets_mean": 8073.1, "valid_targets_min": 3757 }, { "epoch": 3.4814814814814814, "grad_norm": 0.5498492265628724, "learning_rate": 2.3691007918033858e-05, "loss": 0.2526, "loss_nan_ranks": 0, "loss_rank_avg": 0.1583220362663269, "step": 940, "valid_targets_mean": 6594.8, "valid_targets_min": 4154 }, { "epoch": 3.5, "grad_norm": 0.5001189338850895, "learning_rate": 2.3509334620586127e-05, "loss": 0.2445, "loss_nan_ranks": 0, "loss_rank_avg": 0.12958279252052307, "step": 945, "valid_targets_mean": 7600.1, "valid_targets_min": 5250 }, { "epoch": 3.5185185185185186, "grad_norm": 0.4807963923696486, "learning_rate": 2.332736206027887e-05, "loss": 0.2466, "loss_nan_ranks": 0, "loss_rank_avg": 0.10260862857103348, "step": 950, "valid_targets_mean": 6256.6, "valid_targets_min": 3839 }, { "epoch": 3.537037037037037, "grad_norm": 0.5061807303605752, "learning_rate": 2.314510575504771e-05, "loss": 0.2406, "loss_nan_ranks": 0, "loss_rank_avg": 0.13543078303337097, "step": 955, "valid_targets_mean": 7313.0, "valid_targets_min": 4655 }, { "epoch": 3.5555555555555554, "grad_norm": 0.4561793894533891, "learning_rate": 2.2962581247024983e-05, "loss": 0.2394, "loss_nan_ranks": 0, "loss_rank_avg": 0.1189025267958641, "step": 960, "valid_targets_mean": 7561.6, "valid_targets_min": 3970 }, { "epoch": 3.574074074074074, "grad_norm": 0.4825083581140084, "learning_rate": 2.277980410121434e-05, "loss": 0.2396, "loss_nan_ranks": 0, "loss_rank_avg": 0.10504335165023804, "step": 965, "valid_targets_mean": 6335.8, "valid_targets_min": 2847 }, { "epoch": 3.5925925925925926, "grad_norm": 0.4931411405781181, "learning_rate": 2.2596789904163453e-05, "loss": 0.2448, "loss_nan_ranks": 0, "loss_rank_avg": 0.13161906599998474, "step": 970, "valid_targets_mean": 7651.8, "valid_targets_min": 4413 }, { "epoch": 3.611111111111111, "grad_norm": 0.7353648756929703, "learning_rate": 2.2413554262634802e-05, "loss": 0.24, "loss_nan_ranks": 0, "loss_rank_avg": 0.1295996904373169, "step": 975, "valid_targets_mean": 6932.1, "valid_targets_min": 314 }, { "epoch": 3.6296296296296298, "grad_norm": 0.4968603493634512, "learning_rate": 2.223011280227485e-05, "loss": 0.25, "loss_nan_ranks": 0, "loss_rank_avg": 0.11219480633735657, "step": 980, "valid_targets_mean": 6326.2, "valid_targets_min": 255 }, { "epoch": 3.648148148148148, "grad_norm": 0.5295742966751648, "learning_rate": 2.2046481166281496e-05, "loss": 0.2451, "loss_nan_ranks": 0, "loss_rank_avg": 0.1228310838341713, "step": 985, "valid_targets_mean": 5468.6, "valid_targets_min": 316 }, { "epoch": 3.6666666666666665, "grad_norm": 0.48970756187215925, "learning_rate": 2.1862675014070106e-05, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.12276136130094528, "step": 990, "valid_targets_mean": 7585.4, "valid_targets_min": 3731 }, { "epoch": 3.685185185185185, "grad_norm": 0.5126550687549059, "learning_rate": 2.1678710019938136e-05, "loss": 0.2496, "loss_nan_ranks": 0, "loss_rank_avg": 0.11687671393156052, "step": 995, "valid_targets_mean": 5928.8, "valid_targets_min": 2060 }, { "epoch": 3.7037037037037037, "grad_norm": 0.5038883486908647, "learning_rate": 2.149460187172849e-05, "loss": 0.2418, "loss_nan_ranks": 0, "loss_rank_avg": 0.0843639075756073, "step": 1000, "valid_targets_mean": 4160.1, "valid_targets_min": 470 }, { "epoch": 3.7222222222222223, "grad_norm": 0.437278082428542, "learning_rate": 2.1310366269491693e-05, "loss": 0.245, "loss_nan_ranks": 0, "loss_rank_avg": 0.12892818450927734, "step": 1005, "valid_targets_mean": 7916.6, "valid_targets_min": 3774 }, { "epoch": 3.7407407407407405, "grad_norm": 0.7342847594893045, "learning_rate": 2.1126018924147084e-05, "loss": 0.2497, "loss_nan_ranks": 0, "loss_rank_avg": 0.1251305788755417, "step": 1010, "valid_targets_mean": 6748.9, "valid_targets_min": 4396 }, { "epoch": 3.7592592592592595, "grad_norm": 0.5411710794385421, "learning_rate": 2.094157555614304e-05, "loss": 0.2424, "loss_nan_ranks": 0, "loss_rank_avg": 0.13948220014572144, "step": 1015, "valid_targets_mean": 7209.6, "valid_targets_min": 245 }, { "epoch": 3.7777777777777777, "grad_norm": 0.4659496590666761, "learning_rate": 2.0757051894116382e-05, "loss": 0.2322, "loss_nan_ranks": 0, "loss_rank_avg": 0.12497460097074509, "step": 1020, "valid_targets_mean": 7786.8, "valid_targets_min": 3204 }, { "epoch": 3.7962962962962963, "grad_norm": 0.8396535698509019, "learning_rate": 2.057246367355109e-05, "loss": 0.2363, "loss_nan_ranks": 0, "loss_rank_avg": 0.13099229335784912, "step": 1025, "valid_targets_mean": 6280.6, "valid_targets_min": 450 }, { "epoch": 3.814814814814815, "grad_norm": 0.49954658133269225, "learning_rate": 2.038782663543649e-05, "loss": 0.2489, "loss_nan_ranks": 0, "loss_rank_avg": 0.14834380149841309, "step": 1030, "valid_targets_mean": 7883.1, "valid_targets_min": 5303 }, { "epoch": 3.8333333333333335, "grad_norm": 0.5628070531489748, "learning_rate": 2.0203156524924847e-05, "loss": 0.2434, "loss_nan_ranks": 0, "loss_rank_avg": 0.15435266494750977, "step": 1035, "valid_targets_mean": 7055.5, "valid_targets_min": 4829 }, { "epoch": 3.851851851851852, "grad_norm": 0.5299852216946729, "learning_rate": 2.0018469089988723e-05, "loss": 0.2526, "loss_nan_ranks": 0, "loss_rank_avg": 0.17090864479541779, "step": 1040, "valid_targets_mean": 7314.8, "valid_targets_min": 3083 }, { "epoch": 3.8703703703703702, "grad_norm": 0.5063447667820846, "learning_rate": 1.9833780080078063e-05, "loss": 0.2437, "loss_nan_ranks": 0, "loss_rank_avg": 0.13567772507667542, "step": 1045, "valid_targets_mean": 7075.2, "valid_targets_min": 3665 }, { "epoch": 3.888888888888889, "grad_norm": 0.5295998490010958, "learning_rate": 1.9649105244777097e-05, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.12415409088134766, "step": 1050, "valid_targets_mean": 6369.9, "valid_targets_min": 332 }, { "epoch": 3.9074074074074074, "grad_norm": 0.5103141821329165, "learning_rate": 1.946446033246132e-05, "loss": 0.246, "loss_nan_ranks": 0, "loss_rank_avg": 0.13250163197517395, "step": 1055, "valid_targets_mean": 6484.8, "valid_targets_min": 4386 }, { "epoch": 3.925925925925926, "grad_norm": 0.46646251978801523, "learning_rate": 1.927986108895448e-05, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.11562138795852661, "step": 1060, "valid_targets_mean": 7588.4, "valid_targets_min": 5279 }, { "epoch": 3.9444444444444446, "grad_norm": 0.48394668574105776, "learning_rate": 1.9095323256185877e-05, "loss": 0.2449, "loss_nan_ranks": 0, "loss_rank_avg": 0.12256023287773132, "step": 1065, "valid_targets_mean": 6316.8, "valid_targets_min": 1402 }, { "epoch": 3.962962962962963, "grad_norm": 0.48521748020882943, "learning_rate": 1.8910862570847936e-05, "loss": 0.2456, "loss_nan_ranks": 0, "loss_rank_avg": 0.12952281534671783, "step": 1070, "valid_targets_mean": 7025.2, "valid_targets_min": 4953 }, { "epoch": 3.9814814814814814, "grad_norm": 0.49338916425063034, "learning_rate": 1.872649476305423e-05, "loss": 0.2298, "loss_nan_ranks": 0, "loss_rank_avg": 0.11373429000377655, "step": 1075, "valid_targets_mean": 5696.2, "valid_targets_min": 498 }, { "epoch": 4.0, "grad_norm": 0.5727656140824072, "learning_rate": 1.8542235554998097e-05, "loss": 0.2552, "loss_nan_ranks": 0, "loss_rank_avg": 0.17409956455230713, "step": 1080, "valid_targets_mean": 8495.8, "valid_targets_min": 598 }, { "epoch": 4.018518518518518, "grad_norm": 0.516032233014235, "learning_rate": 1.835810065961189e-05, "loss": 0.2343, "loss_nan_ranks": 0, "loss_rank_avg": 0.11870207637548447, "step": 1085, "valid_targets_mean": 7494.0, "valid_targets_min": 5474 }, { "epoch": 4.037037037037037, "grad_norm": 0.5919098251042874, "learning_rate": 1.8174105779227038e-05, "loss": 0.222, "loss_nan_ranks": 0, "loss_rank_avg": 0.10515961796045303, "step": 1090, "valid_targets_mean": 5810.2, "valid_targets_min": 810 }, { "epoch": 4.055555555555555, "grad_norm": 0.5018483993286512, "learning_rate": 1.799026660423503e-05, "loss": 0.2351, "loss_nan_ranks": 0, "loss_rank_avg": 0.10188450664281845, "step": 1095, "valid_targets_mean": 7080.5, "valid_targets_min": 4348 }, { "epoch": 4.074074074074074, "grad_norm": 0.5299673754548403, "learning_rate": 1.780659881174937e-05, "loss": 0.2238, "loss_nan_ranks": 0, "loss_rank_avg": 0.13417434692382812, "step": 1100, "valid_targets_mean": 6985.2, "valid_targets_min": 3872 }, { "epoch": 4.092592592592593, "grad_norm": 0.5130673298943274, "learning_rate": 1.7623118064268726e-05, "loss": 0.2307, "loss_nan_ranks": 0, "loss_rank_avg": 0.09408153593540192, "step": 1105, "valid_targets_mean": 6263.2, "valid_targets_min": 3473 }, { "epoch": 4.111111111111111, "grad_norm": 0.5462228358022995, "learning_rate": 1.743984000834126e-05, "loss": 0.2286, "loss_nan_ranks": 0, "loss_rank_avg": 0.09989724308252335, "step": 1110, "valid_targets_mean": 5982.1, "valid_targets_min": 475 }, { "epoch": 4.12962962962963, "grad_norm": 0.5988706825678385, "learning_rate": 1.7256780273230358e-05, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.10022996366024017, "step": 1115, "valid_targets_mean": 5157.0, "valid_targets_min": 451 }, { "epoch": 4.148148148148148, "grad_norm": 0.48816249519586924, "learning_rate": 1.707395446958183e-05, "loss": 0.2304, "loss_nan_ranks": 0, "loss_rank_avg": 0.10353829711675644, "step": 1120, "valid_targets_mean": 6314.1, "valid_targets_min": 423 }, { "epoch": 4.166666666666667, "grad_norm": 0.49451693646994704, "learning_rate": 1.6891378188092694e-05, "loss": 0.2417, "loss_nan_ranks": 0, "loss_rank_avg": 0.12381202727556229, "step": 1125, "valid_targets_mean": 7282.1, "valid_targets_min": 3336 }, { "epoch": 4.185185185185185, "grad_norm": 0.5087635256864471, "learning_rate": 1.6709066998181653e-05, "loss": 0.2264, "loss_nan_ranks": 0, "loss_rank_avg": 0.12461232393980026, "step": 1130, "valid_targets_mean": 6933.9, "valid_targets_min": 3762 }, { "epoch": 4.203703703703703, "grad_norm": 0.5084569033634785, "learning_rate": 1.6527036446661396e-05, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.15918824076652527, "step": 1135, "valid_targets_mean": 7666.1, "valid_targets_min": 442 }, { "epoch": 4.222222222222222, "grad_norm": 0.5017372904791324, "learning_rate": 1.634530205641283e-05, "loss": 0.235, "loss_nan_ranks": 0, "loss_rank_avg": 0.1275438666343689, "step": 1140, "valid_targets_mean": 7447.4, "valid_targets_min": 5245 }, { "epoch": 4.2407407407407405, "grad_norm": 0.5206246639358723, "learning_rate": 1.616387932506135e-05, "loss": 0.2252, "loss_nan_ranks": 0, "loss_rank_avg": 0.10627258569002151, "step": 1145, "valid_targets_mean": 6473.4, "valid_targets_min": 102 }, { "epoch": 4.2592592592592595, "grad_norm": 0.4977541341325297, "learning_rate": 1.5982783723655225e-05, "loss": 0.2333, "loss_nan_ranks": 0, "loss_rank_avg": 0.08717834949493408, "step": 1150, "valid_targets_mean": 6253.9, "valid_targets_min": 2602 }, { "epoch": 4.277777777777778, "grad_norm": 0.5023452631707405, "learning_rate": 1.580203069534634e-05, "loss": 0.2325, "loss_nan_ranks": 0, "loss_rank_avg": 0.12174762040376663, "step": 1155, "valid_targets_mean": 6748.9, "valid_targets_min": 2016 }, { "epoch": 4.296296296296296, "grad_norm": 0.5279594814422506, "learning_rate": 1.5621635654073216e-05, "loss": 0.2374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12284161150455475, "step": 1160, "valid_targets_mean": 6711.1, "valid_targets_min": 4902 }, { "epoch": 4.314814814814815, "grad_norm": 0.49117787226016985, "learning_rate": 1.5441613983246606e-05, "loss": 0.2316, "loss_nan_ranks": 0, "loss_rank_avg": 0.1301436871290207, "step": 1165, "valid_targets_mean": 6814.2, "valid_targets_min": 5645 }, { "epoch": 4.333333333333333, "grad_norm": 0.5240995867758355, "learning_rate": 1.5261981034437617e-05, "loss": 0.2275, "loss_nan_ranks": 0, "loss_rank_avg": 0.10003925859928131, "step": 1170, "valid_targets_mean": 5936.5, "valid_targets_min": 1476 }, { "epoch": 4.351851851851852, "grad_norm": 0.4887431417522901, "learning_rate": 1.508275212606862e-05, "loss": 0.2295, "loss_nan_ranks": 0, "loss_rank_avg": 0.126789391040802, "step": 1175, "valid_targets_mean": 7479.0, "valid_targets_min": 2546 }, { "epoch": 4.37037037037037, "grad_norm": 0.5010137089706221, "learning_rate": 1.490394254210691e-05, "loss": 0.2247, "loss_nan_ranks": 0, "loss_rank_avg": 0.11638576537370682, "step": 1180, "valid_targets_mean": 7829.9, "valid_targets_min": 6242 }, { "epoch": 4.388888888888889, "grad_norm": 0.5302823871361698, "learning_rate": 1.4725567530761402e-05, "loss": 0.2335, "loss_nan_ranks": 0, "loss_rank_avg": 0.09980429708957672, "step": 1185, "valid_targets_mean": 6296.0, "valid_targets_min": 1823 }, { "epoch": 4.407407407407407, "grad_norm": 0.48938232381512503, "learning_rate": 1.4547642303182282e-05, "loss": 0.2324, "loss_nan_ranks": 0, "loss_rank_avg": 0.12234029173851013, "step": 1190, "valid_targets_mean": 8091.8, "valid_targets_min": 5562 }, { "epoch": 4.425925925925926, "grad_norm": 0.5031522973767952, "learning_rate": 1.4370182032163861e-05, "loss": 0.2177, "loss_nan_ranks": 0, "loss_rank_avg": 0.07054075598716736, "step": 1195, "valid_targets_mean": 4125.9, "valid_targets_min": 392 }, { "epoch": 4.444444444444445, "grad_norm": 0.5018613158088705, "learning_rate": 1.4193201850850717e-05, "loss": 0.2363, "loss_nan_ranks": 0, "loss_rank_avg": 0.10027652978897095, "step": 1200, "valid_targets_mean": 6186.6, "valid_targets_min": 860 }, { "epoch": 4.462962962962963, "grad_norm": 0.4856214263792827, "learning_rate": 1.4016716851447173e-05, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.11477172374725342, "step": 1205, "valid_targets_mean": 7052.6, "valid_targets_min": 4152 }, { "epoch": 4.481481481481482, "grad_norm": 0.4874276605163382, "learning_rate": 1.3840742083930297e-05, "loss": 0.2218, "loss_nan_ranks": 0, "loss_rank_avg": 0.10933384299278259, "step": 1210, "valid_targets_mean": 8173.6, "valid_targets_min": 4973 }, { "epoch": 4.5, "grad_norm": 0.531455676384898, "learning_rate": 1.3665292554766513e-05, "loss": 0.2285, "loss_nan_ranks": 0, "loss_rank_avg": 0.11772537231445312, "step": 1215, "valid_targets_mean": 5933.6, "valid_targets_min": 357 }, { "epoch": 4.518518518518518, "grad_norm": 0.5372681406699098, "learning_rate": 1.3490383225631885e-05, "loss": 0.221, "loss_nan_ranks": 0, "loss_rank_avg": 0.10665614157915115, "step": 1220, "valid_targets_mean": 7539.6, "valid_targets_min": 4008 }, { "epoch": 4.537037037037037, "grad_norm": 0.5211115545139049, "learning_rate": 1.3316029012136251e-05, "loss": 0.2344, "loss_nan_ranks": 0, "loss_rank_avg": 0.12121891230344772, "step": 1225, "valid_targets_mean": 7162.0, "valid_targets_min": 3992 }, { "epoch": 4.555555555555555, "grad_norm": 0.46584465301969513, "learning_rate": 1.314224478255128e-05, "loss": 0.2276, "loss_nan_ranks": 0, "loss_rank_avg": 0.11857609450817108, "step": 1230, "valid_targets_mean": 6463.0, "valid_targets_min": 1340 }, { "epoch": 4.574074074074074, "grad_norm": 0.4588439125962153, "learning_rate": 1.2969045356542558e-05, "loss": 0.2279, "loss_nan_ranks": 0, "loss_rank_avg": 0.10397493839263916, "step": 1235, "valid_targets_mean": 7388.2, "valid_targets_min": 4077 }, { "epoch": 4.592592592592593, "grad_norm": 0.50171375299644, "learning_rate": 1.2796445503905797e-05, "loss": 0.2221, "loss_nan_ranks": 0, "loss_rank_avg": 0.119759202003479, "step": 1240, "valid_targets_mean": 7190.8, "valid_targets_min": 1976 }, { "epoch": 4.611111111111111, "grad_norm": 0.5252807258839242, "learning_rate": 1.2624459943307378e-05, "loss": 0.2246, "loss_nan_ranks": 0, "loss_rank_avg": 0.10422271490097046, "step": 1245, "valid_targets_mean": 6359.0, "valid_targets_min": 4126 }, { "epoch": 4.62962962962963, "grad_norm": 0.4591959160558025, "learning_rate": 1.2453103341029154e-05, "loss": 0.2314, "loss_nan_ranks": 0, "loss_rank_avg": 0.13636991381645203, "step": 1250, "valid_targets_mean": 7624.4, "valid_targets_min": 4176 }, { "epoch": 4.648148148148148, "grad_norm": 0.5397510993994119, "learning_rate": 1.2282390309717776e-05, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.10083657503128052, "step": 1255, "valid_targets_mean": 5491.2, "valid_targets_min": 231 }, { "epoch": 4.666666666666667, "grad_norm": 0.47818583241901386, "learning_rate": 1.2112335407138582e-05, "loss": 0.2246, "loss_nan_ranks": 0, "loss_rank_avg": 0.11500853300094604, "step": 1260, "valid_targets_mean": 6869.8, "valid_targets_min": 4280 }, { "epoch": 4.685185185185185, "grad_norm": 0.6028087003564473, "learning_rate": 1.1942953134934185e-05, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319492757320404, "step": 1265, "valid_targets_mean": 5306.4, "valid_targets_min": 3887 }, { "epoch": 4.703703703703704, "grad_norm": 0.487471515961342, "learning_rate": 1.1774257937387774e-05, "loss": 0.2241, "loss_nan_ranks": 0, "loss_rank_avg": 0.10458207130432129, "step": 1270, "valid_targets_mean": 6610.9, "valid_targets_min": 3766 }, { "epoch": 4.722222222222222, "grad_norm": 0.5162656468076039, "learning_rate": 1.160626420019142e-05, "loss": 0.2373, "loss_nan_ranks": 0, "loss_rank_avg": 0.14040455222129822, "step": 1275, "valid_targets_mean": 7369.4, "valid_targets_min": 1723 }, { "epoch": 4.7407407407407405, "grad_norm": 0.4992171347071927, "learning_rate": 1.1438986249219292e-05, "loss": 0.2274, "loss_nan_ranks": 0, "loss_rank_avg": 0.1111883670091629, "step": 1280, "valid_targets_mean": 7044.2, "valid_targets_min": 1730 }, { "epoch": 4.7592592592592595, "grad_norm": 0.4980112243313627, "learning_rate": 1.1272438349305996e-05, "loss": 0.2216, "loss_nan_ranks": 0, "loss_rank_avg": 0.1132529079914093, "step": 1285, "valid_targets_mean": 6436.4, "valid_targets_min": 4369 }, { "epoch": 4.777777777777778, "grad_norm": 0.4991126681737921, "learning_rate": 1.1106634703030132e-05, "loss": 0.227, "loss_nan_ranks": 0, "loss_rank_avg": 0.13967010378837585, "step": 1290, "valid_targets_mean": 8305.5, "valid_targets_min": 5394 }, { "epoch": 4.796296296296296, "grad_norm": 0.5155034087864115, "learning_rate": 1.0941589449503152e-05, "loss": 0.2262, "loss_nan_ranks": 0, "loss_rank_avg": 0.10799385607242584, "step": 1295, "valid_targets_mean": 6244.2, "valid_targets_min": 413 }, { "epoch": 4.814814814814815, "grad_norm": 0.4522833911832173, "learning_rate": 1.0777316663163604e-05, "loss": 0.2297, "loss_nan_ranks": 0, "loss_rank_avg": 0.10367443412542343, "step": 1300, "valid_targets_mean": 7749.2, "valid_targets_min": 3820 }, { "epoch": 4.833333333333333, "grad_norm": 0.4560347865249111, "learning_rate": 1.061383035257697e-05, "loss": 0.2308, "loss_nan_ranks": 0, "loss_rank_avg": 0.12242156267166138, "step": 1305, "valid_targets_mean": 7613.8, "valid_targets_min": 5903 }, { "epoch": 4.851851851851852, "grad_norm": 0.477804618520794, "learning_rate": 1.0451144459241021e-05, "loss": 0.2257, "loss_nan_ranks": 0, "loss_rank_avg": 0.12609726190567017, "step": 1310, "valid_targets_mean": 7283.4, "valid_targets_min": 4187 }, { "epoch": 4.87037037037037, "grad_norm": 0.4848059755823838, "learning_rate": 1.0289272856396954e-05, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.12737436592578888, "step": 1315, "valid_targets_mean": 6414.9, "valid_targets_min": 3065 }, { "epoch": 4.888888888888889, "grad_norm": 0.5169709458077109, "learning_rate": 1.0128229347846348e-05, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.09037278592586517, "step": 1320, "valid_targets_mean": 5083.1, "valid_targets_min": 2463 }, { "epoch": 4.907407407407407, "grad_norm": 0.4691072483470426, "learning_rate": 9.968027666774005e-06, "loss": 0.2232, "loss_nan_ranks": 0, "loss_rank_avg": 0.11444838345050812, "step": 1325, "valid_targets_mean": 6681.6, "valid_targets_min": 4390 }, { "epoch": 4.925925925925926, "grad_norm": 0.5394803424881776, "learning_rate": 9.80868147457683e-06, "loss": 0.2365, "loss_nan_ranks": 0, "loss_rank_avg": 0.14064496755599976, "step": 1330, "valid_targets_mean": 7132.9, "valid_targets_min": 4613 }, { "epoch": 4.944444444444445, "grad_norm": 0.519246694995982, "learning_rate": 9.650204359698884e-06, "loss": 0.2364, "loss_nan_ranks": 0, "loss_rank_avg": 0.10239046812057495, "step": 1335, "valid_targets_mean": 5749.8, "valid_targets_min": 407 }, { "epoch": 4.962962962962963, "grad_norm": 0.5251939867353143, "learning_rate": 9.492609836472563e-06, "loss": 0.2264, "loss_nan_ranks": 0, "loss_rank_avg": 0.1245264858007431, "step": 1340, "valid_targets_mean": 8139.8, "valid_targets_min": 304 }, { "epoch": 4.981481481481482, "grad_norm": 0.4841966754669591, "learning_rate": 9.33591134396618e-06, "loss": 0.2369, "loss_nan_ranks": 0, "loss_rank_avg": 0.09950557351112366, "step": 1345, "valid_targets_mean": 5948.6, "valid_targets_min": 1240 }, { "epoch": 5.0, "grad_norm": 0.46635316251229963, "learning_rate": 9.180122244837893e-06, "loss": 0.2187, "loss_nan_ranks": 0, "loss_rank_avg": 0.09984344244003296, "step": 1350, "valid_targets_mean": 6878.8, "valid_targets_min": 4793 }, { "epoch": 5.018518518518518, "grad_norm": 0.4917225509060581, "learning_rate": 9.025255824196234e-06, "loss": 0.2078, "loss_nan_ranks": 0, "loss_rank_avg": 0.1116233840584755, "step": 1355, "valid_targets_mean": 6953.8, "valid_targets_min": 3714 }, { "epoch": 5.037037037037037, "grad_norm": 0.5096642938537449, "learning_rate": 8.871325288467188e-06, "loss": 0.2184, "loss_nan_ranks": 0, "loss_rank_avg": 0.1091044619679451, "step": 1360, "valid_targets_mean": 6872.6, "valid_targets_min": 3982 }, { "epoch": 5.055555555555555, "grad_norm": 0.5130910816904468, "learning_rate": 8.718343764267967e-06, "loss": 0.2218, "loss_nan_ranks": 0, "loss_rank_avg": 0.08576367050409317, "step": 1365, "valid_targets_mean": 5957.0, "valid_targets_min": 321 }, { "epoch": 5.074074074074074, "grad_norm": 0.506747302079751, "learning_rate": 8.566324297287674e-06, "loss": 0.2212, "loss_nan_ranks": 0, "loss_rank_avg": 0.14150217175483704, "step": 1370, "valid_targets_mean": 7874.0, "valid_targets_min": 4982 }, { "epoch": 5.092592592592593, "grad_norm": 0.5233616721490927, "learning_rate": 8.41527985117478e-06, "loss": 0.2303, "loss_nan_ranks": 0, "loss_rank_avg": 0.09984303265810013, "step": 1375, "valid_targets_mean": 5710.6, "valid_targets_min": 471 }, { "epoch": 5.111111111111111, "grad_norm": 0.5312752630451189, "learning_rate": 8.265223306431644e-06, "loss": 0.2156, "loss_nan_ranks": 0, "loss_rank_avg": 0.10283049941062927, "step": 1380, "valid_targets_mean": 5761.2, "valid_targets_min": 427 }, { "epoch": 5.12962962962963, "grad_norm": 0.5693784646821431, "learning_rate": 8.116167459316116e-06, "loss": 0.2305, "loss_nan_ranks": 0, "loss_rank_avg": 0.12732897698879242, "step": 1385, "valid_targets_mean": 7584.5, "valid_targets_min": 5131 }, { "epoch": 5.148148148148148, "grad_norm": 0.5027220474125025, "learning_rate": 7.96812502075031e-06, "loss": 0.2249, "loss_nan_ranks": 0, "loss_rank_avg": 0.11553220450878143, "step": 1390, "valid_targets_mean": 6780.9, "valid_targets_min": 368 }, { "epoch": 5.166666666666667, "grad_norm": 0.6619482906498753, "learning_rate": 7.821108615236663e-06, "loss": 0.22, "loss_nan_ranks": 0, "loss_rank_avg": 0.11507928371429443, "step": 1395, "valid_targets_mean": 6894.5, "valid_targets_min": 4617 }, { "epoch": 5.185185185185185, "grad_norm": 0.591868727798189, "learning_rate": 7.675130779781385e-06, "loss": 0.2169, "loss_nan_ranks": 0, "loss_rank_avg": 0.10910572856664658, "step": 1400, "valid_targets_mean": 6408.5, "valid_targets_min": 3807 }, { "epoch": 5.203703703703703, "grad_norm": 0.5126683283711245, "learning_rate": 7.530203962825331e-06, "loss": 0.2166, "loss_nan_ranks": 0, "loss_rank_avg": 0.10854263603687286, "step": 1405, "valid_targets_mean": 7869.9, "valid_targets_min": 6327 }, { "epoch": 5.222222222222222, "grad_norm": 0.5236210165162991, "learning_rate": 7.386340523182451e-06, "loss": 0.2248, "loss_nan_ranks": 0, "loss_rank_avg": 0.127744659781456, "step": 1410, "valid_targets_mean": 6906.0, "valid_targets_min": 2789 }, { "epoch": 5.2407407407407405, "grad_norm": 0.5593021034033383, "learning_rate": 7.243552728985879e-06, "loss": 0.2182, "loss_nan_ranks": 0, "loss_rank_avg": 0.11911047995090485, "step": 1415, "valid_targets_mean": 6434.8, "valid_targets_min": 3960 }, { "epoch": 5.2592592592592595, "grad_norm": 0.49741040737686926, "learning_rate": 7.1018527566417535e-06, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.093578040599823, "step": 1420, "valid_targets_mean": 5445.6, "valid_targets_min": 347 }, { "epoch": 5.277777777777778, "grad_norm": 0.50446609238118, "learning_rate": 6.961252689790836e-06, "loss": 0.2298, "loss_nan_ranks": 0, "loss_rank_avg": 0.12511374056339264, "step": 1425, "valid_targets_mean": 7610.8, "valid_targets_min": 3551 }, { "epoch": 5.296296296296296, "grad_norm": 0.5170007137165736, "learning_rate": 6.821764518278109e-06, "loss": 0.211, "loss_nan_ranks": 0, "loss_rank_avg": 0.10103592276573181, "step": 1430, "valid_targets_mean": 5218.6, "valid_targets_min": 402 }, { "epoch": 5.314814814814815, "grad_norm": 0.49515563805483387, "learning_rate": 6.6834001371302874e-06, "loss": 0.2123, "loss_nan_ranks": 0, "loss_rank_avg": 0.11039568483829498, "step": 1435, "valid_targets_mean": 7072.1, "valid_targets_min": 4250 }, { "epoch": 5.333333333333333, "grad_norm": 0.45720233551933753, "learning_rate": 6.546171345541474e-06, "loss": 0.2261, "loss_nan_ranks": 0, "loss_rank_avg": 0.10742579400539398, "step": 1440, "valid_targets_mean": 7819.8, "valid_targets_min": 5542 }, { "epoch": 5.351851851851852, "grad_norm": 0.4933421472302054, "learning_rate": 6.410089845866969e-06, "loss": 0.2222, "loss_nan_ranks": 0, "loss_rank_avg": 0.09236937761306763, "step": 1445, "valid_targets_mean": 7047.8, "valid_targets_min": 5584 }, { "epoch": 5.37037037037037, "grad_norm": 0.46977639240502755, "learning_rate": 6.275167242625331e-06, "loss": 0.2178, "loss_nan_ranks": 0, "loss_rank_avg": 0.12316617369651794, "step": 1450, "valid_targets_mean": 8225.5, "valid_targets_min": 6143 }, { "epoch": 5.388888888888889, "grad_norm": 0.4919487785546802, "learning_rate": 6.141415041508774e-06, "loss": 0.2162, "loss_nan_ranks": 0, "loss_rank_avg": 0.10855259001255035, "step": 1455, "valid_targets_mean": 6035.5, "valid_targets_min": 387 }, { "epoch": 5.407407407407407, "grad_norm": 0.48842724784791386, "learning_rate": 6.008844648402037e-06, "loss": 0.227, "loss_nan_ranks": 0, "loss_rank_avg": 0.1209472045302391, "step": 1460, "valid_targets_mean": 7274.0, "valid_targets_min": 335 }, { "epoch": 5.425925925925926, "grad_norm": 0.5168122874383904, "learning_rate": 5.877467368409711e-06, "loss": 0.2231, "loss_nan_ranks": 0, "loss_rank_avg": 0.10533533990383148, "step": 1465, "valid_targets_mean": 6178.0, "valid_targets_min": 1402 }, { "epoch": 5.444444444444445, "grad_norm": 0.4938494111570672, "learning_rate": 5.74729440489219e-06, "loss": 0.2105, "loss_nan_ranks": 0, "loss_rank_avg": 0.10273820906877518, "step": 1470, "valid_targets_mean": 7238.5, "valid_targets_min": 4875 }, { "epoch": 5.462962962962963, "grad_norm": 0.4908203224021245, "learning_rate": 5.61833685851028e-06, "loss": 0.2124, "loss_nan_ranks": 0, "loss_rank_avg": 0.1013522744178772, "step": 1475, "valid_targets_mean": 6711.8, "valid_targets_min": 5280 }, { "epoch": 5.481481481481482, "grad_norm": 0.6323153682269085, "learning_rate": 5.490605726278602e-06, "loss": 0.2193, "loss_nan_ranks": 0, "loss_rank_avg": 0.09872453659772873, "step": 1480, "valid_targets_mean": 6050.2, "valid_targets_min": 1873 }, { "epoch": 5.5, "grad_norm": 0.5059930160839521, "learning_rate": 5.364111900627759e-06, "loss": 0.2129, "loss_nan_ranks": 0, "loss_rank_avg": 0.10614493489265442, "step": 1485, "valid_targets_mean": 6757.8, "valid_targets_min": 3389 }, { "epoch": 5.518518518518518, "grad_norm": 0.7298032265200861, "learning_rate": 5.238866168475532e-06, "loss": 0.2229, "loss_nan_ranks": 0, "loss_rank_avg": 0.09671928733587265, "step": 1490, "valid_targets_mean": 6525.5, "valid_targets_min": 1156 }, { "epoch": 5.537037037037037, "grad_norm": 0.46965643801489465, "learning_rate": 5.114879210306967e-06, "loss": 0.2184, "loss_nan_ranks": 0, "loss_rank_avg": 0.09873900562524796, "step": 1495, "valid_targets_mean": 6918.2, "valid_targets_min": 4153 }, { "epoch": 5.555555555555555, "grad_norm": 0.5036641297823798, "learning_rate": 4.9921615992636004e-06, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.10597766935825348, "step": 1500, "valid_targets_mean": 6490.0, "valid_targets_min": 456 }, { "epoch": 5.574074074074074, "grad_norm": 0.5055842380658947, "learning_rate": 4.870723800241832e-06, "loss": 0.2106, "loss_nan_ranks": 0, "loss_rank_avg": 0.08831615746021271, "step": 1505, "valid_targets_mean": 5106.0, "valid_targets_min": 245 }, { "epoch": 5.592592592592593, "grad_norm": 0.4574856423862591, "learning_rate": 4.750576169000476e-06, "loss": 0.224, "loss_nan_ranks": 0, "loss_rank_avg": 0.12107911705970764, "step": 1510, "valid_targets_mean": 8312.5, "valid_targets_min": 7302 }, { "epoch": 5.611111111111111, "grad_norm": 0.5526024879531509, "learning_rate": 4.631728951277716e-06, "loss": 0.217, "loss_nan_ranks": 0, "loss_rank_avg": 0.1274055689573288, "step": 1515, "valid_targets_mean": 6396.0, "valid_targets_min": 329 }, { "epoch": 5.62962962962963, "grad_norm": 0.493838956685955, "learning_rate": 4.514192281917351e-06, "loss": 0.2343, "loss_nan_ranks": 0, "loss_rank_avg": 0.12989237904548645, "step": 1520, "valid_targets_mean": 6073.9, "valid_targets_min": 431 }, { "epoch": 5.648148148148148, "grad_norm": 0.4867215016980891, "learning_rate": 4.397976184004553e-06, "loss": 0.2122, "loss_nan_ranks": 0, "loss_rank_avg": 0.0971713662147522, "step": 1525, "valid_targets_mean": 6264.5, "valid_targets_min": 390 }, { "epoch": 5.666666666666667, "grad_norm": 0.5159449102090277, "learning_rate": 4.283090568011106e-06, "loss": 0.215, "loss_nan_ranks": 0, "loss_rank_avg": 0.12054109573364258, "step": 1530, "valid_targets_mean": 7413.1, "valid_targets_min": 5734 }, { "epoch": 5.685185185185185, "grad_norm": 0.4889188303592639, "learning_rate": 4.169545230950321e-06, "loss": 0.2116, "loss_nan_ranks": 0, "loss_rank_avg": 0.1139330267906189, "step": 1535, "valid_targets_mean": 6801.9, "valid_targets_min": 3473 }, { "epoch": 5.703703703703704, "grad_norm": 0.4840565682120755, "learning_rate": 4.057349855541557e-06, "loss": 0.2261, "loss_nan_ranks": 0, "loss_rank_avg": 0.10931402444839478, "step": 1540, "valid_targets_mean": 7520.5, "valid_targets_min": 4113 }, { "epoch": 5.722222222222222, "grad_norm": 0.488027844595327, "learning_rate": 3.9465140093845035e-06, "loss": 0.2246, "loss_nan_ranks": 0, "loss_rank_avg": 0.1238413006067276, "step": 1545, "valid_targets_mean": 7499.6, "valid_targets_min": 4714 }, { "epoch": 5.7407407407407405, "grad_norm": 0.5478734177317678, "learning_rate": 3.837047144143331e-06, "loss": 0.2321, "loss_nan_ranks": 0, "loss_rank_avg": 0.12330850958824158, "step": 1550, "valid_targets_mean": 5695.8, "valid_targets_min": 2294 }, { "epoch": 5.7592592592592595, "grad_norm": 0.4783212632587014, "learning_rate": 3.7289585947406504e-06, "loss": 0.2175, "loss_nan_ranks": 0, "loss_rank_avg": 0.09635508060455322, "step": 1555, "valid_targets_mean": 6844.5, "valid_targets_min": 4746 }, { "epoch": 5.777777777777778, "grad_norm": 0.4569367430559496, "learning_rate": 3.6222575785614898e-06, "loss": 0.2117, "loss_nan_ranks": 0, "loss_rank_avg": 0.11338578909635544, "step": 1560, "valid_targets_mean": 7313.2, "valid_targets_min": 5594 }, { "epoch": 5.796296296296296, "grad_norm": 0.4652991348453085, "learning_rate": 3.5169531946672563e-06, "loss": 0.2301, "loss_nan_ranks": 0, "loss_rank_avg": 0.12462925910949707, "step": 1565, "valid_targets_mean": 8310.9, "valid_targets_min": 2015 }, { "epoch": 5.814814814814815, "grad_norm": 0.45290586655386583, "learning_rate": 3.413054423019815e-06, "loss": 0.2188, "loss_nan_ranks": 0, "loss_rank_avg": 0.1050763726234436, "step": 1570, "valid_targets_mean": 7246.6, "valid_targets_min": 477 }, { "epoch": 5.833333333333333, "grad_norm": 0.46941892907078986, "learning_rate": 3.3105701237156885e-06, "loss": 0.2247, "loss_nan_ranks": 0, "loss_rank_avg": 0.09569090604782104, "step": 1575, "valid_targets_mean": 6234.6, "valid_targets_min": 3204 }, { "epoch": 5.851851851851852, "grad_norm": 0.48656566073537605, "learning_rate": 3.2095090362305316e-06, "loss": 0.2267, "loss_nan_ranks": 0, "loss_rank_avg": 0.12894827127456665, "step": 1580, "valid_targets_mean": 7983.6, "valid_targets_min": 6576 }, { "epoch": 5.87037037037037, "grad_norm": 0.5659066142032889, "learning_rate": 3.1098797786738433e-06, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.12359024584293365, "step": 1585, "valid_targets_mean": 6504.0, "valid_targets_min": 3646 }, { "epoch": 5.888888888888889, "grad_norm": 0.4995416346883199, "learning_rate": 3.011690847054054e-06, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.1010461077094078, "step": 1590, "valid_targets_mean": 6379.1, "valid_targets_min": 2857 }, { "epoch": 5.907407407407407, "grad_norm": 0.5306353785153805, "learning_rate": 2.9149506145540064e-06, "loss": 0.2209, "loss_nan_ranks": 0, "loss_rank_avg": 0.09737998247146606, "step": 1595, "valid_targets_mean": 6443.9, "valid_targets_min": 310 }, { "epoch": 5.925925925925926, "grad_norm": 0.4781652098007515, "learning_rate": 2.819667330816942e-06, "loss": 0.2114, "loss_nan_ranks": 0, "loss_rank_avg": 0.10525282472372055, "step": 1600, "valid_targets_mean": 7038.8, "valid_targets_min": 2065 }, { "epoch": 5.944444444444445, "grad_norm": 0.5099409359460987, "learning_rate": 2.725849121242976e-06, "loss": 0.2117, "loss_nan_ranks": 0, "loss_rank_avg": 0.09639444947242737, "step": 1605, "valid_targets_mean": 7068.9, "valid_targets_min": 2646 }, { "epoch": 5.962962962962963, "grad_norm": 0.482201179503476, "learning_rate": 2.633503986296215e-06, "loss": 0.2132, "loss_nan_ranks": 0, "loss_rank_avg": 0.09317293018102646, "step": 1610, "valid_targets_mean": 6141.4, "valid_targets_min": 3965 }, { "epoch": 5.981481481481482, "grad_norm": 0.524434990755225, "learning_rate": 2.5426398008225084e-06, "loss": 0.214, "loss_nan_ranks": 0, "loss_rank_avg": 0.07734841853380203, "step": 1615, "valid_targets_mean": 5183.4, "valid_targets_min": 535 }, { "epoch": 6.0, "grad_norm": 0.5477430720593653, "learning_rate": 2.4532643133778922e-06, "loss": 0.2186, "loss_nan_ranks": 0, "loss_rank_avg": 0.09451612830162048, "step": 1620, "valid_targets_mean": 6252.8, "valid_targets_min": 409 }, { "epoch": 6.018518518518518, "grad_norm": 0.5122116198318503, "learning_rate": 2.36538514556784e-06, "loss": 0.2131, "loss_nan_ranks": 0, "loss_rank_avg": 0.12386941909790039, "step": 1625, "valid_targets_mean": 7532.4, "valid_targets_min": 5944 }, { "epoch": 6.037037037037037, "grad_norm": 0.48210109707959664, "learning_rate": 2.2790097913973154e-06, "loss": 0.2141, "loss_nan_ranks": 0, "loss_rank_avg": 0.09711402654647827, "step": 1630, "valid_targets_mean": 6310.2, "valid_targets_min": 3336 }, { "epoch": 6.055555555555555, "grad_norm": 0.5222607009794501, "learning_rate": 2.1941456166316953e-06, "loss": 0.221, "loss_nan_ranks": 0, "loss_rank_avg": 0.11883814632892609, "step": 1635, "valid_targets_mean": 6612.6, "valid_targets_min": 460 }, { "epoch": 6.074074074074074, "grad_norm": 0.5229353313952704, "learning_rate": 2.1107998581686793e-06, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.10010014474391937, "step": 1640, "valid_targets_mean": 6095.0, "valid_targets_min": 446 }, { "epoch": 6.092592592592593, "grad_norm": 0.5109976673243143, "learning_rate": 2.0289796234211235e-06, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.10803158581256866, "step": 1645, "valid_targets_mean": 6716.4, "valid_targets_min": 400 }, { "epoch": 6.111111111111111, "grad_norm": 0.4762813898465876, "learning_rate": 1.9486918897109607e-06, "loss": 0.2142, "loss_nan_ranks": 0, "loss_rank_avg": 0.1186881810426712, "step": 1650, "valid_targets_mean": 7162.5, "valid_targets_min": 321 }, { "epoch": 6.12962962962963, "grad_norm": 0.5468221852953015, "learning_rate": 1.8699435036741987e-06, "loss": 0.2134, "loss_nan_ranks": 0, "loss_rank_avg": 0.11599022150039673, "step": 1655, "valid_targets_mean": 5360.0, "valid_targets_min": 437 }, { "epoch": 6.148148148148148, "grad_norm": 0.4613166221297042, "learning_rate": 1.792741180677069e-06, "loss": 0.2116, "loss_nan_ranks": 0, "loss_rank_avg": 0.11193803697824478, "step": 1660, "valid_targets_mean": 7607.8, "valid_targets_min": 3046 }, { "epoch": 6.166666666666667, "grad_norm": 0.4992299365923156, "learning_rate": 1.7170915042433468e-06, "loss": 0.2112, "loss_nan_ranks": 0, "loss_rank_avg": 0.10833920538425446, "step": 1665, "valid_targets_mean": 6474.9, "valid_targets_min": 2007 }, { "epoch": 6.185185185185185, "grad_norm": 0.48734826804572084, "learning_rate": 1.643000925492959e-06, "loss": 0.2167, "loss_nan_ranks": 0, "loss_rank_avg": 0.10757909715175629, "step": 1670, "valid_targets_mean": 7296.0, "valid_targets_min": 4685 }, { "epoch": 6.203703703703703, "grad_norm": 0.5038576702929699, "learning_rate": 1.5704757625918454e-06, "loss": 0.2198, "loss_nan_ranks": 0, "loss_rank_avg": 0.0956081971526146, "step": 1675, "valid_targets_mean": 6562.6, "valid_targets_min": 530 }, { "epoch": 6.222222222222222, "grad_norm": 0.5014972764981547, "learning_rate": 1.499522200213166e-06, "loss": 0.2139, "loss_nan_ranks": 0, "loss_rank_avg": 0.13388274610042572, "step": 1680, "valid_targets_mean": 7633.9, "valid_targets_min": 2642 }, { "epoch": 6.2407407407407405, "grad_norm": 0.5727109885681817, "learning_rate": 1.4301462890099016e-06, "loss": 0.2188, "loss_nan_ranks": 0, "loss_rank_avg": 0.1071832925081253, "step": 1685, "valid_targets_mean": 6702.2, "valid_targets_min": 2613 }, { "epoch": 6.2592592592592595, "grad_norm": 0.526681748443717, "learning_rate": 1.362353945098862e-06, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.149953693151474, "step": 1690, "valid_targets_mean": 7246.5, "valid_targets_min": 2595 }, { "epoch": 6.277777777777778, "grad_norm": 0.4820760829827286, "learning_rate": 1.2961509495562074e-06, "loss": 0.2115, "loss_nan_ranks": 0, "loss_rank_avg": 0.09220828860998154, "step": 1695, "valid_targets_mean": 6987.0, "valid_targets_min": 4035 }, { "epoch": 6.296296296296296, "grad_norm": 0.5236479137171796, "learning_rate": 1.2315429479244378e-06, "loss": 0.2169, "loss_nan_ranks": 0, "loss_rank_avg": 0.11444682627916336, "step": 1700, "valid_targets_mean": 6010.9, "valid_targets_min": 385 }, { "epoch": 6.314814814814815, "grad_norm": 0.5245034887848089, "learning_rate": 1.1685354497309764e-06, "loss": 0.2141, "loss_nan_ranks": 0, "loss_rank_avg": 0.12459330260753632, "step": 1705, "valid_targets_mean": 6315.4, "valid_targets_min": 4419 }, { "epoch": 6.333333333333333, "grad_norm": 1.9145227209817528, "learning_rate": 1.107133828018323e-06, "loss": 0.2228, "loss_nan_ranks": 0, "loss_rank_avg": 0.09349900484085083, "step": 1710, "valid_targets_mean": 6550.8, "valid_targets_min": 4214 }, { "epoch": 6.351851851851852, "grad_norm": 0.5373455723958673, "learning_rate": 1.0473433188858784e-06, "loss": 0.2142, "loss_nan_ranks": 0, "loss_rank_avg": 0.08351831883192062, "step": 1715, "valid_targets_mean": 5286.6, "valid_targets_min": 2838 }, { "epoch": 6.37037037037037, "grad_norm": 0.5118701946064793, "learning_rate": 9.891690210434235e-07, "loss": 0.226, "loss_nan_ranks": 0, "loss_rank_avg": 0.08807747066020966, "step": 1720, "valid_targets_mean": 5508.8, "valid_targets_min": 3072 }, { "epoch": 6.388888888888889, "grad_norm": 0.48206381384965274, "learning_rate": 9.326158953763009e-07, "loss": 0.2017, "loss_nan_ranks": 0, "loss_rank_avg": 0.09705394506454468, "step": 1725, "valid_targets_mean": 6747.9, "valid_targets_min": 5018 }, { "epoch": 6.407407407407407, "grad_norm": 0.5459872813528737, "learning_rate": 8.776887645224086e-07, "loss": 0.2075, "loss_nan_ranks": 0, "loss_rank_avg": 0.1001187264919281, "step": 1730, "valid_targets_mean": 7545.6, "valid_targets_min": 5814 }, { "epoch": 6.425925925925926, "grad_norm": 0.5445943239279856, "learning_rate": 8.243923124609066e-07, "loss": 0.2161, "loss_nan_ranks": 0, "loss_rank_avg": 0.09796342998743057, "step": 1735, "valid_targets_mean": 7020.4, "valid_targets_min": 3779 }, { "epoch": 6.444444444444445, "grad_norm": 0.4684005826009441, "learning_rate": 7.727310841128055e-07, "loss": 0.2325, "loss_nan_ranks": 0, "loss_rank_avg": 0.14073237776756287, "step": 1740, "valid_targets_mean": 8810.2, "valid_targets_min": 1462 }, { "epoch": 6.462962962962963, "grad_norm": 0.4541060098469679, "learning_rate": 7.227094849533878e-07, "loss": 0.2124, "loss_nan_ranks": 0, "loss_rank_avg": 0.09816377609968185, "step": 1745, "valid_targets_mean": 7020.2, "valid_targets_min": 2597 }, { "epoch": 6.481481481481482, "grad_norm": 0.4673983795871858, "learning_rate": 6.743317806365213e-07, "loss": 0.2198, "loss_nan_ranks": 0, "loss_rank_avg": 0.10708339512348175, "step": 1750, "valid_targets_mean": 7477.1, "valid_targets_min": 4900 }, { "epoch": 6.5, "grad_norm": 0.6105942569643106, "learning_rate": 6.276020966309059e-07, "loss": 0.199, "loss_nan_ranks": 0, "loss_rank_avg": 0.09840669482946396, "step": 1755, "valid_targets_mean": 6394.8, "valid_targets_min": 3962 }, { "epoch": 6.518518518518518, "grad_norm": 0.5261917751262789, "learning_rate": 5.825244178682621e-07, "loss": 0.2089, "loss_nan_ranks": 0, "loss_rank_avg": 0.12467174232006073, "step": 1760, "valid_targets_mean": 7320.4, "valid_targets_min": 4238 }, { "epoch": 6.537037037037037, "grad_norm": 0.5040103215289163, "learning_rate": 5.391025884035239e-07, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.1107766330242157, "step": 1765, "valid_targets_mean": 6337.5, "valid_targets_min": 329 }, { "epoch": 6.555555555555555, "grad_norm": 0.4850335062667886, "learning_rate": 4.973403110870178e-07, "loss": 0.2167, "loss_nan_ranks": 0, "loss_rank_avg": 0.0921555906534195, "step": 1770, "valid_targets_mean": 6312.9, "valid_targets_min": 5236 }, { "epoch": 6.574074074074074, "grad_norm": 0.5151266321529011, "learning_rate": 4.5724114724870593e-07, "loss": 0.2176, "loss_nan_ranks": 0, "loss_rank_avg": 0.12505650520324707, "step": 1775, "valid_targets_mean": 7164.6, "valid_targets_min": 5352 }, { "epoch": 6.592592592592593, "grad_norm": 0.5287100835670476, "learning_rate": 4.188085163944866e-07, "loss": 0.2036, "loss_nan_ranks": 0, "loss_rank_avg": 0.09333769977092743, "step": 1780, "valid_targets_mean": 6233.1, "valid_targets_min": 471 }, { "epoch": 6.611111111111111, "grad_norm": 0.5184514712077551, "learning_rate": 3.820456959145924e-07, "loss": 0.2138, "loss_nan_ranks": 0, "loss_rank_avg": 0.10321502387523651, "step": 1785, "valid_targets_mean": 6844.4, "valid_targets_min": 4841 }, { "epoch": 6.62962962962963, "grad_norm": 0.7780741153859662, "learning_rate": 3.4695582080410686e-07, "loss": 0.2173, "loss_nan_ranks": 0, "loss_rank_avg": 0.1244448572397232, "step": 1790, "valid_targets_mean": 7082.1, "valid_targets_min": 4348 }, { "epoch": 6.648148148148148, "grad_norm": 0.5680521512792298, "learning_rate": 3.1354188339562277e-07, "loss": 0.2123, "loss_nan_ranks": 0, "loss_rank_avg": 0.11791664361953735, "step": 1795, "valid_targets_mean": 6692.6, "valid_targets_min": 4308 }, { "epoch": 6.666666666666667, "grad_norm": 0.5333100590033606, "learning_rate": 2.818067331040708e-07, "loss": 0.211, "loss_nan_ranks": 0, "loss_rank_avg": 0.10548874735832214, "step": 1800, "valid_targets_mean": 6284.0, "valid_targets_min": 4426 }, { "epoch": 6.685185185185185, "grad_norm": 0.4756549745078081, "learning_rate": 2.517530761837228e-07, "loss": 0.212, "loss_nan_ranks": 0, "loss_rank_avg": 0.10474137216806412, "step": 1805, "valid_targets_mean": 7218.1, "valid_targets_min": 474 }, { "epoch": 6.703703703703704, "grad_norm": 0.6122065278621339, "learning_rate": 2.2338347549742956e-07, "loss": 0.2125, "loss_nan_ranks": 0, "loss_rank_avg": 0.12869982421398163, "step": 1810, "valid_targets_mean": 5424.0, "valid_targets_min": 301 }, { "epoch": 6.722222222222222, "grad_norm": 0.47686114593760576, "learning_rate": 1.9670035029804912e-07, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.1062430888414383, "step": 1815, "valid_targets_mean": 7357.9, "valid_targets_min": 5245 }, { "epoch": 6.7407407407407405, "grad_norm": 0.4663069570426537, "learning_rate": 1.7170597602215622e-07, "loss": 0.2105, "loss_nan_ranks": 0, "loss_rank_avg": 0.10924944281578064, "step": 1820, "valid_targets_mean": 7345.1, "valid_targets_min": 417 }, { "epoch": 6.7592592592592595, "grad_norm": 0.4863214142184415, "learning_rate": 1.4840248409599966e-07, "loss": 0.2166, "loss_nan_ranks": 0, "loss_rank_avg": 0.11560134589672089, "step": 1825, "valid_targets_mean": 6813.0, "valid_targets_min": 336 }, { "epoch": 6.777777777777778, "grad_norm": 0.5144234551395174, "learning_rate": 1.2679186175373448e-07, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.10990774631500244, "step": 1830, "valid_targets_mean": 7161.0, "valid_targets_min": 4072 }, { "epoch": 6.796296296296296, "grad_norm": 0.5092042732657823, "learning_rate": 1.0687595186797073e-07, "loss": 0.2187, "loss_nan_ranks": 0, "loss_rank_avg": 0.11906181275844574, "step": 1835, "valid_targets_mean": 7092.1, "valid_targets_min": 2798 }, { "epoch": 6.814814814814815, "grad_norm": 0.4879235110558119, "learning_rate": 8.865645279260815e-08, "loss": 0.2191, "loss_nan_ranks": 0, "loss_rank_avg": 0.11363916099071503, "step": 1840, "valid_targets_mean": 6146.9, "valid_targets_min": 597 }, { "epoch": 6.833333333333333, "grad_norm": 0.47702518244695324, "learning_rate": 7.213491821800977e-08, "loss": 0.2081, "loss_nan_ranks": 0, "loss_rank_avg": 0.10680747032165527, "step": 1845, "valid_targets_mean": 7832.5, "valid_targets_min": 6237 }, { "epoch": 6.851851851851852, "grad_norm": 0.5034576503374186, "learning_rate": 5.731275703851902e-08, "loss": 0.2148, "loss_nan_ranks": 0, "loss_rank_avg": 0.08941599726676941, "step": 1850, "valid_targets_mean": 6900.6, "valid_targets_min": 2557 }, { "epoch": 6.87037037037037, "grad_norm": 0.5186476251514881, "learning_rate": 4.4191233232300235e-08, "loss": 0.206, "loss_nan_ranks": 0, "loss_rank_avg": 0.1075291633605957, "step": 1855, "valid_targets_mean": 6265.1, "valid_targets_min": 457 }, { "epoch": 6.888888888888889, "grad_norm": 0.48818375200273456, "learning_rate": 3.2771465753560495e-08, "loss": 0.2164, "loss_nan_ranks": 0, "loss_rank_avg": 0.11990895122289658, "step": 1860, "valid_targets_mean": 6609.4, "valid_targets_min": 1476 }, { "epoch": 6.907407407407407, "grad_norm": 0.4514003867926065, "learning_rate": 2.3054428437125907e-08, "loss": 0.2106, "loss_nan_ranks": 0, "loss_rank_avg": 0.0958041399717331, "step": 1865, "valid_targets_mean": 7317.1, "valid_targets_min": 1873 }, { "epoch": 6.925925925925926, "grad_norm": 0.4750915394409351, "learning_rate": 1.5040949915399173e-08, "loss": 0.2159, "loss_nan_ranks": 0, "loss_rank_avg": 0.11690913140773773, "step": 1870, "valid_targets_mean": 7307.8, "valid_targets_min": 4736 }, { "epoch": 6.944444444444445, "grad_norm": 0.4406540020628789, "learning_rate": 8.731713547689424e-09, "loss": 0.2157, "loss_nan_ranks": 0, "loss_rank_avg": 0.09516197443008423, "step": 1875, "valid_targets_mean": 8098.8, "valid_targets_min": 5210 }, { "epoch": 6.962962962962963, "grad_norm": 0.4595438788046572, "learning_rate": 4.127257361954406e-09, "loss": 0.2136, "loss_nan_ranks": 0, "loss_rank_avg": 0.08866438269615173, "step": 1880, "valid_targets_mean": 6785.6, "valid_targets_min": 274 }, { "epoch": 6.981481481481482, "grad_norm": 0.4913637179887596, "learning_rate": 1.2279740088971814e-09, "loss": 0.2234, "loss_nan_ranks": 0, "loss_rank_avg": 0.13494595885276794, "step": 1885, "valid_targets_mean": 7588.5, "valid_targets_min": 5906 }, { "epoch": 7.0, "grad_norm": 0.5014822111690687, "learning_rate": 3.411072850179054e-11, "loss": 0.22, "loss_nan_ranks": 0, "loss_rank_avg": 0.10025124996900558, "step": 1890, "valid_targets_mean": 5791.9, "valid_targets_min": 453 }, { "epoch": 7.0, "step": 1890, "total_flos": 1.153060328150401e+18, "train_loss": 0.0, "train_runtime": 0.761, "train_samples_per_second": 39699.155, "train_steps_per_second": 2483.497 } ], "logging_steps": 5, "max_steps": 1890, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.153060328150401e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }