{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1274, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.027573529411764705, "grad_norm": 12.922369370251287, "learning_rate": 1.25e-06, "loss": 1.0219, "loss_nan_ranks": 0, "loss_rank_avg": 0.35352784395217896, "step": 5, "valid_targets_mean": 10593.7, "valid_targets_min": 1314 }, { "epoch": 0.05514705882352941, "grad_norm": 9.914692363999333, "learning_rate": 2.8125e-06, "loss": 1.0055, "loss_nan_ranks": 0, "loss_rank_avg": 0.33466315269470215, "step": 10, "valid_targets_mean": 9719.9, "valid_targets_min": 4041 }, { "epoch": 0.08272058823529412, "grad_norm": 5.140457433964579, "learning_rate": 4.3750000000000005e-06, "loss": 0.9532, "loss_nan_ranks": 0, "loss_rank_avg": 0.31088295578956604, "step": 15, "valid_targets_mean": 9489.2, "valid_targets_min": 3677 }, { "epoch": 0.11029411764705882, "grad_norm": 2.3405453339381914, "learning_rate": 5.9375e-06, "loss": 0.8851, "loss_nan_ranks": 0, "loss_rank_avg": 0.28772222995758057, "step": 20, "valid_targets_mean": 10487.8, "valid_targets_min": 1794 }, { "epoch": 0.13786764705882354, "grad_norm": 1.6997672536663537, "learning_rate": 7.500000000000001e-06, "loss": 0.8505, "loss_nan_ranks": 0, "loss_rank_avg": 0.2901165187358856, "step": 25, "valid_targets_mean": 10282.9, "valid_targets_min": 1881 }, { "epoch": 0.16544117647058823, "grad_norm": 1.3985790541526377, "learning_rate": 9.0625e-06, "loss": 0.8194, "loss_nan_ranks": 0, "loss_rank_avg": 0.2608858346939087, "step": 30, "valid_targets_mean": 9144.1, "valid_targets_min": 2515 }, { "epoch": 0.19301470588235295, "grad_norm": 0.8623722371179741, "learning_rate": 1.0625e-05, "loss": 0.79, "loss_nan_ranks": 0, "loss_rank_avg": 0.2887074649333954, "step": 35, "valid_targets_mean": 10813.7, "valid_targets_min": 3092 }, { "epoch": 0.22058823529411764, "grad_norm": 0.8093983860510723, "learning_rate": 1.2187500000000001e-05, "loss": 0.7763, "loss_nan_ranks": 0, "loss_rank_avg": 0.26193925738334656, "step": 40, "valid_targets_mean": 10127.0, "valid_targets_min": 2496 }, { "epoch": 0.24816176470588236, "grad_norm": 0.5974300980069079, "learning_rate": 1.375e-05, "loss": 0.7367, "loss_nan_ranks": 0, "loss_rank_avg": 0.2483624666929245, "step": 45, "valid_targets_mean": 10489.3, "valid_targets_min": 3368 }, { "epoch": 0.2757352941176471, "grad_norm": 0.46168263473020144, "learning_rate": 1.5312500000000003e-05, "loss": 0.7257, "loss_nan_ranks": 0, "loss_rank_avg": 0.2395600974559784, "step": 50, "valid_targets_mean": 10156.6, "valid_targets_min": 1452 }, { "epoch": 0.30330882352941174, "grad_norm": 0.4028635985676079, "learning_rate": 1.6875e-05, "loss": 0.7035, "loss_nan_ranks": 0, "loss_rank_avg": 0.20824620127677917, "step": 55, "valid_targets_mean": 8890.8, "valid_targets_min": 1913 }, { "epoch": 0.33088235294117646, "grad_norm": 0.3895890718140441, "learning_rate": 1.84375e-05, "loss": 0.6873, "loss_nan_ranks": 0, "loss_rank_avg": 0.22504115104675293, "step": 60, "valid_targets_mean": 9894.5, "valid_targets_min": 3118 }, { "epoch": 0.3584558823529412, "grad_norm": 0.30444276365102046, "learning_rate": 2e-05, "loss": 0.6644, "loss_nan_ranks": 0, "loss_rank_avg": 0.20244480669498444, "step": 65, "valid_targets_mean": 9749.3, "valid_targets_min": 1933 }, { "epoch": 0.3860294117647059, "grad_norm": 0.2581486013897639, "learning_rate": 2.1562500000000002e-05, "loss": 0.6489, "loss_nan_ranks": 0, "loss_rank_avg": 0.2169645130634308, "step": 70, "valid_targets_mean": 10628.2, "valid_targets_min": 1803 }, { "epoch": 0.41360294117647056, "grad_norm": 0.2631860893496907, "learning_rate": 2.3125000000000003e-05, "loss": 0.6346, "loss_nan_ranks": 0, "loss_rank_avg": 0.22958728671073914, "step": 75, "valid_targets_mean": 10800.9, "valid_targets_min": 2064 }, { "epoch": 0.4411764705882353, "grad_norm": 0.274808899082167, "learning_rate": 2.46875e-05, "loss": 0.6256, "loss_nan_ranks": 0, "loss_rank_avg": 0.23121704161167145, "step": 80, "valid_targets_mean": 11487.6, "valid_targets_min": 4161 }, { "epoch": 0.46875, "grad_norm": 0.2583078645972247, "learning_rate": 2.625e-05, "loss": 0.612, "loss_nan_ranks": 0, "loss_rank_avg": 0.1923944652080536, "step": 85, "valid_targets_mean": 9252.9, "valid_targets_min": 226 }, { "epoch": 0.4963235294117647, "grad_norm": 0.24880268689380583, "learning_rate": 2.7812500000000002e-05, "loss": 0.607, "loss_nan_ranks": 0, "loss_rank_avg": 0.19335561990737915, "step": 90, "valid_targets_mean": 9072.7, "valid_targets_min": 1797 }, { "epoch": 0.5238970588235294, "grad_norm": 0.2616086521839269, "learning_rate": 2.9375000000000003e-05, "loss": 0.5981, "loss_nan_ranks": 0, "loss_rank_avg": 0.19159752130508423, "step": 95, "valid_targets_mean": 9744.3, "valid_targets_min": 1782 }, { "epoch": 0.5514705882352942, "grad_norm": 0.27692625930198184, "learning_rate": 3.09375e-05, "loss": 0.5915, "loss_nan_ranks": 0, "loss_rank_avg": 0.18734252452850342, "step": 100, "valid_targets_mean": 8845.1, "valid_targets_min": 2130 }, { "epoch": 0.5790441176470589, "grad_norm": 0.2574148775678182, "learning_rate": 3.2500000000000004e-05, "loss": 0.5845, "loss_nan_ranks": 0, "loss_rank_avg": 0.16766202449798584, "step": 105, "valid_targets_mean": 8674.7, "valid_targets_min": 2213 }, { "epoch": 0.6066176470588235, "grad_norm": 0.27869577303780235, "learning_rate": 3.40625e-05, "loss": 0.5806, "loss_nan_ranks": 0, "loss_rank_avg": 0.19295048713684082, "step": 110, "valid_targets_mean": 10376.7, "valid_targets_min": 1765 }, { "epoch": 0.6341911764705882, "grad_norm": 0.2984993331075873, "learning_rate": 3.5625000000000005e-05, "loss": 0.5777, "loss_nan_ranks": 0, "loss_rank_avg": 0.20063826441764832, "step": 115, "valid_targets_mean": 9936.5, "valid_targets_min": 1494 }, { "epoch": 0.6617647058823529, "grad_norm": 0.271090279172367, "learning_rate": 3.71875e-05, "loss": 0.5672, "loss_nan_ranks": 0, "loss_rank_avg": 0.18824777007102966, "step": 120, "valid_targets_mean": 9430.4, "valid_targets_min": 1915 }, { "epoch": 0.6893382352941176, "grad_norm": 0.29538515374006735, "learning_rate": 3.875e-05, "loss": 0.5646, "loss_nan_ranks": 0, "loss_rank_avg": 0.1923852413892746, "step": 125, "valid_targets_mean": 9131.7, "valid_targets_min": 1686 }, { "epoch": 0.7169117647058824, "grad_norm": 0.3122370627704027, "learning_rate": 3.999992484978314e-05, "loss": 0.5644, "loss_nan_ranks": 0, "loss_rank_avg": 0.20504149794578552, "step": 130, "valid_targets_mean": 10173.0, "valid_targets_min": 1298 }, { "epoch": 0.7444852941176471, "grad_norm": 0.34703468911750324, "learning_rate": 3.999729465149199e-05, "loss": 0.557, "loss_nan_ranks": 0, "loss_rank_avg": 0.17850443720817566, "step": 135, "valid_targets_mean": 9228.8, "valid_targets_min": 924 }, { "epoch": 0.7720588235294118, "grad_norm": 0.332190362036781, "learning_rate": 3.9990907507094396e-05, "loss": 0.5497, "loss_nan_ranks": 0, "loss_rank_avg": 0.16969668865203857, "step": 140, "valid_targets_mean": 8790.3, "valid_targets_min": 344 }, { "epoch": 0.7996323529411765, "grad_norm": 0.3034703265351846, "learning_rate": 3.9980764616560544e-05, "loss": 0.5564, "loss_nan_ranks": 0, "loss_rank_avg": 0.18241870403289795, "step": 145, "valid_targets_mean": 9520.5, "valid_targets_min": 1223 }, { "epoch": 0.8272058823529411, "grad_norm": 0.3259015215149292, "learning_rate": 3.9966867885462854e-05, "loss": 0.5555, "loss_nan_ranks": 0, "loss_rank_avg": 0.19907459616661072, "step": 150, "valid_targets_mean": 11470.9, "valid_targets_min": 3448 }, { "epoch": 0.8547794117647058, "grad_norm": 0.3279486360331197, "learning_rate": 3.994921992461797e-05, "loss": 0.5461, "loss_nan_ranks": 0, "loss_rank_avg": 0.18447428941726685, "step": 155, "valid_targets_mean": 9752.1, "valid_targets_min": 3827 }, { "epoch": 0.8823529411764706, "grad_norm": 0.3415570189377227, "learning_rate": 3.992782404959627e-05, "loss": 0.5485, "loss_nan_ranks": 0, "loss_rank_avg": 0.18221747875213623, "step": 160, "valid_targets_mean": 9179.8, "valid_targets_min": 1780 }, { "epoch": 0.9099264705882353, "grad_norm": 0.39579278374013543, "learning_rate": 3.9902684280098965e-05, "loss": 0.5476, "loss_nan_ranks": 0, "loss_rank_avg": 0.18513306975364685, "step": 165, "valid_targets_mean": 10199.6, "valid_targets_min": 2157 }, { "epoch": 0.9375, "grad_norm": 0.2798313329426632, "learning_rate": 3.987380533920287e-05, "loss": 0.5392, "loss_nan_ranks": 0, "loss_rank_avg": 0.2026323676109314, "step": 170, "valid_targets_mean": 11315.1, "valid_targets_min": 6090 }, { "epoch": 0.9650735294117647, "grad_norm": 0.3493942306576815, "learning_rate": 3.984119265247314e-05, "loss": 0.5474, "loss_nan_ranks": 0, "loss_rank_avg": 0.15390989184379578, "step": 175, "valid_targets_mean": 8281.4, "valid_targets_min": 2125 }, { "epoch": 0.9926470588235294, "grad_norm": 0.36521635704700256, "learning_rate": 3.9804852346943866e-05, "loss": 0.5426, "loss_nan_ranks": 0, "loss_rank_avg": 0.18721234798431396, "step": 180, "valid_targets_mean": 10394.0, "valid_targets_min": 2057 }, { "epoch": 1.0165441176470589, "grad_norm": 0.30506233144116673, "learning_rate": 3.9764791249967044e-05, "loss": 0.5428, "loss_nan_ranks": 0, "loss_rank_avg": 0.19133886694908142, "step": 185, "valid_targets_mean": 10289.3, "valid_targets_min": 2465 }, { "epoch": 1.0441176470588236, "grad_norm": 0.2910843215873972, "learning_rate": 3.972101688792986e-05, "loss": 0.5312, "loss_nan_ranks": 0, "loss_rank_avg": 0.1826484203338623, "step": 190, "valid_targets_mean": 9581.6, "valid_targets_min": 1460 }, { "epoch": 1.0716911764705883, "grad_norm": 0.2755584276978832, "learning_rate": 3.967353748484071e-05, "loss": 0.5344, "loss_nan_ranks": 0, "loss_rank_avg": 0.18369868397712708, "step": 195, "valid_targets_mean": 10337.5, "valid_targets_min": 2688 }, { "epoch": 1.099264705882353, "grad_norm": 0.359251780614338, "learning_rate": 3.962236196078411e-05, "loss": 0.5225, "loss_nan_ranks": 0, "loss_rank_avg": 0.15699255466461182, "step": 200, "valid_targets_mean": 9281.1, "valid_targets_min": 1589 }, { "epoch": 1.1268382352941178, "grad_norm": 0.27720289986260105, "learning_rate": 3.956749993024489e-05, "loss": 0.5257, "loss_nan_ranks": 0, "loss_rank_avg": 0.140591099858284, "step": 205, "valid_targets_mean": 7799.2, "valid_targets_min": 1608 }, { "epoch": 1.1544117647058822, "grad_norm": 0.26983976305045465, "learning_rate": 3.950896170030186e-05, "loss": 0.5259, "loss_nan_ranks": 0, "loss_rank_avg": 0.17038512229919434, "step": 210, "valid_targets_mean": 9629.8, "valid_targets_min": 2130 }, { "epoch": 1.181985294117647, "grad_norm": 0.2946999679550615, "learning_rate": 3.9446758268691395e-05, "loss": 0.5268, "loss_nan_ranks": 0, "loss_rank_avg": 0.18730860948562622, "step": 215, "valid_targets_mean": 9681.8, "valid_targets_min": 455 }, { "epoch": 1.2095588235294117, "grad_norm": 0.299840004738946, "learning_rate": 3.9380901321741315e-05, "loss": 0.5185, "loss_nan_ranks": 0, "loss_rank_avg": 0.17023658752441406, "step": 220, "valid_targets_mean": 9639.6, "valid_targets_min": 3931 }, { "epoch": 1.2371323529411764, "grad_norm": 0.2966956776393768, "learning_rate": 3.931140323217524e-05, "loss": 0.526, "loss_nan_ranks": 0, "loss_rank_avg": 0.16511359810829163, "step": 225, "valid_targets_mean": 9019.7, "valid_targets_min": 1681 }, { "epoch": 1.2647058823529411, "grad_norm": 0.2881134761116685, "learning_rate": 3.923827705678818e-05, "loss": 0.5219, "loss_nan_ranks": 0, "loss_rank_avg": 0.17622773349285126, "step": 230, "valid_targets_mean": 9761.3, "valid_targets_min": 1943 }, { "epoch": 1.2922794117647058, "grad_norm": 0.3211671842301337, "learning_rate": 3.916153653399352e-05, "loss": 0.5215, "loss_nan_ranks": 0, "loss_rank_avg": 0.17426463961601257, "step": 235, "valid_targets_mean": 9748.3, "valid_targets_min": 2276 }, { "epoch": 1.3198529411764706, "grad_norm": 0.32683328829954483, "learning_rate": 3.908119608124184e-05, "loss": 0.522, "loss_nan_ranks": 0, "loss_rank_avg": 0.16995075345039368, "step": 240, "valid_targets_mean": 9808.2, "valid_targets_min": 3060 }, { "epoch": 1.3474264705882353, "grad_norm": 0.2885476814272769, "learning_rate": 3.8997270792312435e-05, "loss": 0.5139, "loss_nan_ranks": 0, "loss_rank_avg": 0.172532856464386, "step": 245, "valid_targets_mean": 9445.8, "valid_targets_min": 1887 }, { "epoch": 1.375, "grad_norm": 0.40619327872879724, "learning_rate": 3.890977643447746e-05, "loss": 0.5224, "loss_nan_ranks": 0, "loss_rank_avg": 0.16190695762634277, "step": 250, "valid_targets_mean": 9265.1, "valid_targets_min": 4316 }, { "epoch": 1.4025735294117647, "grad_norm": 0.3079521022594924, "learning_rate": 3.8818729445539765e-05, "loss": 0.5096, "loss_nan_ranks": 0, "loss_rank_avg": 0.14533713459968567, "step": 255, "valid_targets_mean": 8282.3, "valid_targets_min": 2412 }, { "epoch": 1.4301470588235294, "grad_norm": 0.31036078222548275, "learning_rate": 3.872414693074466e-05, "loss": 0.5148, "loss_nan_ranks": 0, "loss_rank_avg": 0.1640068143606186, "step": 260, "valid_targets_mean": 9684.3, "valid_targets_min": 3184 }, { "epoch": 1.4577205882352942, "grad_norm": 0.26541664634789813, "learning_rate": 3.862604665956632e-05, "loss": 0.5115, "loss_nan_ranks": 0, "loss_rank_avg": 0.17487174272537231, "step": 265, "valid_targets_mean": 10682.2, "valid_targets_min": 4848 }, { "epoch": 1.4852941176470589, "grad_norm": 0.28874244943207317, "learning_rate": 3.8524447062369355e-05, "loss": 0.5134, "loss_nan_ranks": 0, "loss_rank_avg": 0.16116756200790405, "step": 270, "valid_targets_mean": 9613.4, "valid_targets_min": 1794 }, { "epoch": 1.5128676470588234, "grad_norm": 0.3043371506396099, "learning_rate": 3.8419367226946286e-05, "loss": 0.5167, "loss_nan_ranks": 0, "loss_rank_avg": 0.1825055181980133, "step": 275, "valid_targets_mean": 11411.1, "valid_targets_min": 3611 }, { "epoch": 1.5404411764705883, "grad_norm": 0.3223108606843313, "learning_rate": 3.831082689493143e-05, "loss": 0.5176, "loss_nan_ranks": 0, "loss_rank_avg": 0.16505330801010132, "step": 280, "valid_targets_mean": 9131.8, "valid_targets_min": 2184 }, { "epoch": 1.5680147058823528, "grad_norm": 0.27729135718776116, "learning_rate": 3.819884645809203e-05, "loss": 0.5147, "loss_nan_ranks": 0, "loss_rank_avg": 0.16858059167861938, "step": 285, "valid_targets_mean": 9615.7, "valid_targets_min": 1996 }, { "epoch": 1.5955882352941178, "grad_norm": 0.3660763161980808, "learning_rate": 3.808344695449715e-05, "loss": 0.5088, "loss_nan_ranks": 0, "loss_rank_avg": 0.1702558845281601, "step": 290, "valid_targets_mean": 10235.8, "valid_targets_min": 4142 }, { "epoch": 1.6231617647058822, "grad_norm": 0.3041037414824209, "learning_rate": 3.796465006456523e-05, "loss": 0.5065, "loss_nan_ranks": 0, "loss_rank_avg": 0.1646835058927536, "step": 295, "valid_targets_mean": 9766.0, "valid_targets_min": 1557 }, { "epoch": 1.6507352941176472, "grad_norm": 0.3198950333330496, "learning_rate": 3.784247810699093e-05, "loss": 0.5101, "loss_nan_ranks": 0, "loss_rank_avg": 0.18033772706985474, "step": 300, "valid_targets_mean": 10882.1, "valid_targets_min": 3133 }, { "epoch": 1.6783088235294117, "grad_norm": 0.2915834656120734, "learning_rate": 3.7716954034552004e-05, "loss": 0.5113, "loss_nan_ranks": 0, "loss_rank_avg": 0.17557981610298157, "step": 305, "valid_targets_mean": 10674.8, "valid_targets_min": 1627 }, { "epoch": 1.7058823529411766, "grad_norm": 0.3586758923774135, "learning_rate": 3.758810142979719e-05, "loss": 0.5087, "loss_nan_ranks": 0, "loss_rank_avg": 0.17392417788505554, "step": 310, "valid_targets_mean": 9954.1, "valid_targets_min": 1356 }, { "epoch": 1.7334558823529411, "grad_norm": 0.2645346812549258, "learning_rate": 3.74559445006156e-05, "loss": 0.5157, "loss_nan_ranks": 0, "loss_rank_avg": 0.17819076776504517, "step": 315, "valid_targets_mean": 10323.9, "valid_targets_min": 2361 }, { "epoch": 1.7610294117647058, "grad_norm": 0.25271889404246967, "learning_rate": 3.732050807568878e-05, "loss": 0.5066, "loss_nan_ranks": 0, "loss_rank_avg": 0.17629846930503845, "step": 320, "valid_targets_mean": 11451.3, "valid_targets_min": 1489 }, { "epoch": 1.7886029411764706, "grad_norm": 0.2763437959725264, "learning_rate": 3.718181759982604e-05, "loss": 0.5073, "loss_nan_ranks": 0, "loss_rank_avg": 0.1563217043876648, "step": 325, "valid_targets_mean": 8785.6, "valid_targets_min": 2129 }, { "epoch": 1.8161764705882353, "grad_norm": 0.30042350536966067, "learning_rate": 3.703989912918409e-05, "loss": 0.5054, "loss_nan_ranks": 0, "loss_rank_avg": 0.176944762468338, "step": 330, "valid_targets_mean": 10494.3, "valid_targets_min": 2765 }, { "epoch": 1.84375, "grad_norm": 0.3378004054537919, "learning_rate": 3.689477932637181e-05, "loss": 0.5034, "loss_nan_ranks": 0, "loss_rank_avg": 0.1579429805278778, "step": 335, "valid_targets_mean": 9586.9, "valid_targets_min": 1283 }, { "epoch": 1.8713235294117647, "grad_norm": 0.2679674790343664, "learning_rate": 3.674648545544104e-05, "loss": 0.5077, "loss_nan_ranks": 0, "loss_rank_avg": 0.178257018327713, "step": 340, "valid_targets_mean": 9507.9, "valid_targets_min": 2612 }, { "epoch": 1.8988970588235294, "grad_norm": 0.2579462646319047, "learning_rate": 3.659504537676444e-05, "loss": 0.4975, "loss_nan_ranks": 0, "loss_rank_avg": 0.15231987833976746, "step": 345, "valid_targets_mean": 9333.8, "valid_targets_min": 1719 }, { "epoch": 1.9264705882352942, "grad_norm": 0.2749601553036317, "learning_rate": 3.6440487541801246e-05, "loss": 0.4995, "loss_nan_ranks": 0, "loss_rank_avg": 0.16826336085796356, "step": 350, "valid_targets_mean": 9289.1, "valid_targets_min": 2423 }, { "epoch": 1.9540441176470589, "grad_norm": 0.27745508706156247, "learning_rate": 3.628284098775207e-05, "loss": 0.5038, "loss_nan_ranks": 0, "loss_rank_avg": 0.1490720808506012, "step": 355, "valid_targets_mean": 8659.3, "valid_targets_min": 3197 }, { "epoch": 1.9816176470588234, "grad_norm": 0.255832293559672, "learning_rate": 3.612213533210356e-05, "loss": 0.5071, "loss_nan_ranks": 0, "loss_rank_avg": 0.1693265736103058, "step": 360, "valid_targets_mean": 10030.6, "valid_targets_min": 2085 }, { "epoch": 2.005514705882353, "grad_norm": 0.33130827465355267, "learning_rate": 3.595840076706411e-05, "loss": 0.5046, "loss_nan_ranks": 0, "loss_rank_avg": 0.15648218989372253, "step": 365, "valid_targets_mean": 9743.0, "valid_targets_min": 1915 }, { "epoch": 2.0330882352941178, "grad_norm": 0.3384282546059246, "learning_rate": 3.579166805389154e-05, "loss": 0.4962, "loss_nan_ranks": 0, "loss_rank_avg": 0.16740265488624573, "step": 370, "valid_targets_mean": 9421.0, "valid_targets_min": 1529 }, { "epoch": 2.0606617647058822, "grad_norm": 0.33622127319274503, "learning_rate": 3.562196851711391e-05, "loss": 0.4878, "loss_nan_ranks": 0, "loss_rank_avg": 0.16401368379592896, "step": 375, "valid_targets_mean": 8955.2, "valid_targets_min": 1538 }, { "epoch": 2.088235294117647, "grad_norm": 0.2894449977451741, "learning_rate": 3.5449334038644515e-05, "loss": 0.5018, "loss_nan_ranks": 0, "loss_rank_avg": 0.16942408680915833, "step": 380, "valid_targets_mean": 9713.6, "valid_targets_min": 1919 }, { "epoch": 2.1158088235294117, "grad_norm": 0.34751761847543067, "learning_rate": 3.5273797051792114e-05, "loss": 0.4948, "loss_nan_ranks": 0, "loss_rank_avg": 0.1818588376045227, "step": 385, "valid_targets_mean": 10809.2, "valid_targets_min": 4108 }, { "epoch": 2.1433823529411766, "grad_norm": 0.2635161588753612, "learning_rate": 3.509539053516759e-05, "loss": 0.4966, "loss_nan_ranks": 0, "loss_rank_avg": 0.16954530775547028, "step": 390, "valid_targets_mean": 10632.2, "valid_targets_min": 1764 }, { "epoch": 2.170955882352941, "grad_norm": 0.24420082473821403, "learning_rate": 3.49141480064882e-05, "loss": 0.4982, "loss_nan_ranks": 0, "loss_rank_avg": 0.17877304553985596, "step": 395, "valid_targets_mean": 11006.1, "valid_targets_min": 3599 }, { "epoch": 2.198529411764706, "grad_norm": 0.27155049413143767, "learning_rate": 3.47301035162805e-05, "loss": 0.4882, "loss_nan_ranks": 0, "loss_rank_avg": 0.16475136578083038, "step": 400, "valid_targets_mean": 9823.3, "valid_targets_min": 2475 }, { "epoch": 2.2261029411764706, "grad_norm": 0.3287310658697828, "learning_rate": 3.454329164148317e-05, "loss": 0.4965, "loss_nan_ranks": 0, "loss_rank_avg": 0.16053670644760132, "step": 405, "valid_targets_mean": 9885.7, "valid_targets_min": 3454 }, { "epoch": 2.2536764705882355, "grad_norm": 0.2784404641293258, "learning_rate": 3.435374747895095e-05, "loss": 0.4873, "loss_nan_ranks": 0, "loss_rank_avg": 0.16837987303733826, "step": 410, "valid_targets_mean": 9935.2, "valid_targets_min": 1837 }, { "epoch": 2.28125, "grad_norm": 0.28151764792692086, "learning_rate": 3.4161506638860903e-05, "loss": 0.4956, "loss_nan_ranks": 0, "loss_rank_avg": 0.15354721248149872, "step": 415, "valid_targets_mean": 9233.6, "valid_targets_min": 1381 }, { "epoch": 2.3088235294117645, "grad_norm": 0.3543003365756421, "learning_rate": 3.396660523802225e-05, "loss": 0.4878, "loss_nan_ranks": 0, "loss_rank_avg": 0.15362058579921722, "step": 420, "valid_targets_mean": 9784.1, "valid_targets_min": 2502 }, { "epoch": 2.3363970588235294, "grad_norm": 0.3459844290219232, "learning_rate": 3.376907989309097e-05, "loss": 0.4898, "loss_nan_ranks": 0, "loss_rank_avg": 0.16421331465244293, "step": 425, "valid_targets_mean": 9645.9, "valid_targets_min": 1466 }, { "epoch": 2.363970588235294, "grad_norm": 0.2867558328265102, "learning_rate": 3.3568967713690574e-05, "loss": 0.4911, "loss_nan_ranks": 0, "loss_rank_avg": 0.16321928799152374, "step": 430, "valid_targets_mean": 9835.3, "valid_targets_min": 2574 }, { "epoch": 2.391544117647059, "grad_norm": 0.26780085668465703, "learning_rate": 3.3366306295440195e-05, "loss": 0.4835, "loss_nan_ranks": 0, "loss_rank_avg": 0.17828045785427094, "step": 435, "valid_targets_mean": 11611.0, "valid_targets_min": 2718 }, { "epoch": 2.4191176470588234, "grad_norm": 0.26518791813195564, "learning_rate": 3.316113371289137e-05, "loss": 0.4973, "loss_nan_ranks": 0, "loss_rank_avg": 0.15982230007648468, "step": 440, "valid_targets_mean": 9063.5, "valid_targets_min": 3199 }, { "epoch": 2.4466911764705883, "grad_norm": 0.28443782068251516, "learning_rate": 3.295348851237494e-05, "loss": 0.4926, "loss_nan_ranks": 0, "loss_rank_avg": 0.14535918831825256, "step": 445, "valid_targets_mean": 9235.6, "valid_targets_min": 617 }, { "epoch": 2.474264705882353, "grad_norm": 0.3188266718733089, "learning_rate": 3.2743409704759175e-05, "loss": 0.495, "loss_nan_ranks": 0, "loss_rank_avg": 0.18030281364917755, "step": 450, "valid_targets_mean": 10848.3, "valid_targets_min": 4332 }, { "epoch": 2.5018382352941178, "grad_norm": 0.2667422692086372, "learning_rate": 3.253093675812073e-05, "loss": 0.488, "loss_nan_ranks": 0, "loss_rank_avg": 0.15864768624305725, "step": 455, "valid_targets_mean": 9596.5, "valid_targets_min": 1920 }, { "epoch": 2.5294117647058822, "grad_norm": 0.2604029768908445, "learning_rate": 3.231610959032968e-05, "loss": 0.4885, "loss_nan_ranks": 0, "loss_rank_avg": 0.1746014654636383, "step": 460, "valid_targets_mean": 10420.3, "valid_targets_min": 1764 }, { "epoch": 2.556985294117647, "grad_norm": 0.2858191680359426, "learning_rate": 3.2098968561550024e-05, "loss": 0.4868, "loss_nan_ranks": 0, "loss_rank_avg": 0.15824320912361145, "step": 465, "valid_targets_mean": 10062.4, "valid_targets_min": 1489 }, { "epoch": 2.5845588235294117, "grad_norm": 0.2597776478065362, "learning_rate": 3.18795544666571e-05, "loss": 0.4875, "loss_nan_ranks": 0, "loss_rank_avg": 0.15228307247161865, "step": 470, "valid_targets_mean": 9501.3, "valid_targets_min": 2570 }, { "epoch": 2.6121323529411766, "grad_norm": 0.2619195905683205, "learning_rate": 3.1657908527573376e-05, "loss": 0.489, "loss_nan_ranks": 0, "loss_rank_avg": 0.14361000061035156, "step": 475, "valid_targets_mean": 8306.0, "valid_targets_min": 521 }, { "epoch": 2.639705882352941, "grad_norm": 0.3061155537403774, "learning_rate": 3.143407238552394e-05, "loss": 0.4835, "loss_nan_ranks": 0, "loss_rank_avg": 0.13861876726150513, "step": 480, "valid_targets_mean": 9225.4, "valid_targets_min": 344 }, { "epoch": 2.6672794117647056, "grad_norm": 0.2602743554835305, "learning_rate": 3.1208088093213276e-05, "loss": 0.4882, "loss_nan_ranks": 0, "loss_rank_avg": 0.15896561741828918, "step": 485, "valid_targets_mean": 9260.0, "valid_targets_min": 1839 }, { "epoch": 2.6948529411764706, "grad_norm": 0.2724505413952303, "learning_rate": 3.097999810692468e-05, "loss": 0.4825, "loss_nan_ranks": 0, "loss_rank_avg": 0.14052462577819824, "step": 490, "valid_targets_mean": 8514.9, "valid_targets_min": 2227 }, { "epoch": 2.7224264705882355, "grad_norm": 0.2662825428558882, "learning_rate": 3.074984527854392e-05, "loss": 0.492, "loss_nan_ranks": 0, "loss_rank_avg": 0.16768789291381836, "step": 495, "valid_targets_mean": 9108.5, "valid_targets_min": 413 }, { "epoch": 2.75, "grad_norm": 0.2975150447539531, "learning_rate": 3.0517672847508517e-05, "loss": 0.4858, "loss_nan_ranks": 0, "loss_rank_avg": 0.15554597973823547, "step": 500, "valid_targets_mean": 9202.9, "valid_targets_min": 967 }, { "epoch": 2.7775735294117645, "grad_norm": 0.2550495273374524, "learning_rate": 3.0283524432684214e-05, "loss": 0.4909, "loss_nan_ranks": 0, "loss_rank_avg": 0.178862527012825, "step": 505, "valid_targets_mean": 10239.5, "valid_targets_min": 4379 }, { "epoch": 2.8051470588235294, "grad_norm": 0.31261731562154393, "learning_rate": 3.0047444024170197e-05, "loss": 0.4781, "loss_nan_ranks": 0, "loss_rank_avg": 0.14991816878318787, "step": 510, "valid_targets_mean": 9316.9, "valid_targets_min": 349 }, { "epoch": 2.8327205882352944, "grad_norm": 0.30993641338002453, "learning_rate": 2.9809475975034586e-05, "loss": 0.4862, "loss_nan_ranks": 0, "loss_rank_avg": 0.16486816108226776, "step": 515, "valid_targets_mean": 10542.5, "valid_targets_min": 4704 }, { "epoch": 2.860294117647059, "grad_norm": 0.2824856754358018, "learning_rate": 2.9569664992981648e-05, "loss": 0.4807, "loss_nan_ranks": 0, "loss_rank_avg": 0.15252447128295898, "step": 520, "valid_targets_mean": 9458.6, "valid_targets_min": 2709 }, { "epoch": 2.8878676470588234, "grad_norm": 0.2930565337599535, "learning_rate": 2.932805613195249e-05, "loss": 0.4843, "loss_nan_ranks": 0, "loss_rank_avg": 0.14687579870224, "step": 525, "valid_targets_mean": 8935.3, "valid_targets_min": 2361 }, { "epoch": 2.9154411764705883, "grad_norm": 0.30339081178623667, "learning_rate": 2.9084694783660615e-05, "loss": 0.4824, "loss_nan_ranks": 0, "loss_rank_avg": 0.19605809450149536, "step": 530, "valid_targets_mean": 11474.3, "valid_targets_min": 3566 }, { "epoch": 2.943014705882353, "grad_norm": 0.35774459464396025, "learning_rate": 2.8839626669064073e-05, "loss": 0.486, "loss_nan_ranks": 0, "loss_rank_avg": 0.15574738383293152, "step": 535, "valid_targets_mean": 9388.9, "valid_targets_min": 1223 }, { "epoch": 2.9705882352941178, "grad_norm": 0.23890534631672108, "learning_rate": 2.8592897829775732e-05, "loss": 0.4887, "loss_nan_ranks": 0, "loss_rank_avg": 0.1441933661699295, "step": 540, "valid_targets_mean": 9105.8, "valid_targets_min": 2471 }, { "epoch": 2.9981617647058822, "grad_norm": 0.2874190440574071, "learning_rate": 2.8344554619413355e-05, "loss": 0.486, "loss_nan_ranks": 0, "loss_rank_avg": 0.16626250743865967, "step": 545, "valid_targets_mean": 9902.8, "valid_targets_min": 1834 }, { "epoch": 3.0220588235294117, "grad_norm": 0.2797379791583693, "learning_rate": 2.8094643694890947e-05, "loss": 0.4762, "loss_nan_ranks": 0, "loss_rank_avg": 0.15490218997001648, "step": 550, "valid_targets_mean": 9643.8, "valid_targets_min": 2579 }, { "epoch": 3.0496323529411766, "grad_norm": 0.2781063666088034, "learning_rate": 2.784321200765326e-05, "loss": 0.4829, "loss_nan_ranks": 0, "loss_rank_avg": 0.1765371710062027, "step": 555, "valid_targets_mean": 10938.4, "valid_targets_min": 3219 }, { "epoch": 3.077205882352941, "grad_norm": 0.2633292268883383, "learning_rate": 2.7590306794854853e-05, "loss": 0.4862, "loss_nan_ranks": 0, "loss_rank_avg": 0.1536048948764801, "step": 560, "valid_targets_mean": 9472.4, "valid_targets_min": 2616 }, { "epoch": 3.104779411764706, "grad_norm": 0.3024333218240714, "learning_rate": 2.7335975570485552e-05, "loss": 0.4809, "loss_nan_ranks": 0, "loss_rank_avg": 0.16254714131355286, "step": 565, "valid_targets_mean": 9964.5, "valid_targets_min": 1829 }, { "epoch": 3.1323529411764706, "grad_norm": 0.296651896630342, "learning_rate": 2.7080266116443855e-05, "loss": 0.4784, "loss_nan_ranks": 0, "loss_rank_avg": 0.1577146351337433, "step": 570, "valid_targets_mean": 8750.8, "valid_targets_min": 698 }, { "epoch": 3.1599264705882355, "grad_norm": 0.2731702178415364, "learning_rate": 2.6823226473559992e-05, "loss": 0.4783, "loss_nan_ranks": 0, "loss_rank_avg": 0.16758793592453003, "step": 575, "valid_targets_mean": 10279.2, "valid_targets_min": 2019 }, { "epoch": 3.1875, "grad_norm": 0.30439545297509896, "learning_rate": 2.656490493257042e-05, "loss": 0.4725, "loss_nan_ranks": 0, "loss_rank_avg": 0.1514369696378708, "step": 580, "valid_targets_mean": 9135.0, "valid_targets_min": 2521 }, { "epoch": 3.2150735294117645, "grad_norm": 0.27128678116813276, "learning_rate": 2.6305350025045257e-05, "loss": 0.478, "loss_nan_ranks": 0, "loss_rank_avg": 0.16004298627376556, "step": 585, "valid_targets_mean": 9188.1, "valid_targets_min": 1834 }, { "epoch": 3.2426470588235294, "grad_norm": 0.2915602972024743, "learning_rate": 2.604461051427054e-05, "loss": 0.4767, "loss_nan_ranks": 0, "loss_rank_avg": 0.1670864373445511, "step": 590, "valid_targets_mean": 9554.1, "valid_targets_min": 2225 }, { "epoch": 3.270220588235294, "grad_norm": 0.2652841967385897, "learning_rate": 2.5782735386086954e-05, "loss": 0.478, "loss_nan_ranks": 0, "loss_rank_avg": 0.18112346529960632, "step": 595, "valid_targets_mean": 10889.3, "valid_targets_min": 2020 }, { "epoch": 3.297794117647059, "grad_norm": 0.29039039949321094, "learning_rate": 2.5519773839686707e-05, "loss": 0.4792, "loss_nan_ranks": 0, "loss_rank_avg": 0.16282935440540314, "step": 600, "valid_targets_mean": 9785.2, "valid_targets_min": 2533 }, { "epoch": 3.3253676470588234, "grad_norm": 0.3075493577268038, "learning_rate": 2.525577527837036e-05, "loss": 0.4755, "loss_nan_ranks": 0, "loss_rank_avg": 0.16678908467292786, "step": 605, "valid_targets_mean": 10208.7, "valid_targets_min": 1585 }, { "epoch": 3.3529411764705883, "grad_norm": 0.28091651740846124, "learning_rate": 2.4990789300265256e-05, "loss": 0.4742, "loss_nan_ranks": 0, "loss_rank_avg": 0.15700221061706543, "step": 610, "valid_targets_mean": 9146.5, "valid_targets_min": 2410 }, { "epoch": 3.380514705882353, "grad_norm": 0.28102447688530885, "learning_rate": 2.472486568900745e-05, "loss": 0.4695, "loss_nan_ranks": 0, "loss_rank_avg": 0.15741194784641266, "step": 615, "valid_targets_mean": 9390.8, "valid_targets_min": 1976 }, { "epoch": 3.4080882352941178, "grad_norm": 0.33795109998469414, "learning_rate": 2.445805440438866e-05, "loss": 0.4796, "loss_nan_ranks": 0, "loss_rank_avg": 0.1518850028514862, "step": 620, "valid_targets_mean": 9751.7, "valid_targets_min": 1398 }, { "epoch": 3.4356617647058822, "grad_norm": 0.2899570887713248, "learning_rate": 2.419040557297024e-05, "loss": 0.4784, "loss_nan_ranks": 0, "loss_rank_avg": 0.15818291902542114, "step": 625, "valid_targets_mean": 9005.8, "valid_targets_min": 1875 }, { "epoch": 3.463235294117647, "grad_norm": 0.27113682924469873, "learning_rate": 2.3921969478665702e-05, "loss": 0.4742, "loss_nan_ranks": 0, "loss_rank_avg": 0.14796185493469238, "step": 630, "valid_targets_mean": 8848.4, "valid_targets_min": 1331 }, { "epoch": 3.4908088235294117, "grad_norm": 0.30195562694475775, "learning_rate": 2.3652796553293794e-05, "loss": 0.4707, "loss_nan_ranks": 0, "loss_rank_avg": 0.1724786013364792, "step": 635, "valid_targets_mean": 10914.3, "valid_targets_min": 2947 }, { "epoch": 3.5183823529411766, "grad_norm": 0.27090220964326883, "learning_rate": 2.338293736710373e-05, "loss": 0.4748, "loss_nan_ranks": 0, "loss_rank_avg": 0.1456066370010376, "step": 640, "valid_targets_mean": 8925.1, "valid_targets_min": 617 }, { "epoch": 3.545955882352941, "grad_norm": 0.2644791412793208, "learning_rate": 2.3112442619274408e-05, "loss": 0.4756, "loss_nan_ranks": 0, "loss_rank_avg": 0.1511625200510025, "step": 645, "valid_targets_mean": 9793.2, "valid_targets_min": 1724 }, { "epoch": 3.5735294117647056, "grad_norm": 0.25813430075433286, "learning_rate": 2.2841363128389388e-05, "loss": 0.4839, "loss_nan_ranks": 0, "loss_rank_avg": 0.16180767118930817, "step": 650, "valid_targets_mean": 10196.3, "valid_targets_min": 2718 }, { "epoch": 3.6011029411764706, "grad_norm": 0.27785983668454983, "learning_rate": 2.2569749822889526e-05, "loss": 0.4758, "loss_nan_ranks": 0, "loss_rank_avg": 0.1579504758119583, "step": 655, "valid_targets_mean": 8632.2, "valid_targets_min": 1381 }, { "epoch": 3.6286764705882355, "grad_norm": 0.29111353881691315, "learning_rate": 2.229765373150489e-05, "loss": 0.4719, "loss_nan_ranks": 0, "loss_rank_avg": 0.15923120081424713, "step": 660, "valid_targets_mean": 9744.8, "valid_targets_min": 1369 }, { "epoch": 3.65625, "grad_norm": 0.25730565829379, "learning_rate": 2.2025125973667817e-05, "loss": 0.4679, "loss_nan_ranks": 0, "loss_rank_avg": 0.13490340113639832, "step": 665, "valid_targets_mean": 8098.9, "valid_targets_min": 1119 }, { "epoch": 3.6838235294117645, "grad_norm": 0.25083718631280677, "learning_rate": 2.1752217749908997e-05, "loss": 0.4714, "loss_nan_ranks": 0, "loss_rank_avg": 0.16616028547286987, "step": 670, "valid_targets_mean": 10727.1, "valid_targets_min": 1686 }, { "epoch": 3.7113970588235294, "grad_norm": 0.29118617817633485, "learning_rate": 2.147898033223831e-05, "loss": 0.4709, "loss_nan_ranks": 0, "loss_rank_avg": 0.1437336653470993, "step": 675, "valid_targets_mean": 8196.6, "valid_targets_min": 643 }, { "epoch": 3.7389705882352944, "grad_norm": 0.2868072475645195, "learning_rate": 2.120546505451218e-05, "loss": 0.4716, "loss_nan_ranks": 0, "loss_rank_avg": 0.1592090129852295, "step": 680, "valid_targets_mean": 9257.1, "valid_targets_min": 2425 }, { "epoch": 3.766544117647059, "grad_norm": 0.2646065431597373, "learning_rate": 2.0931723302789346e-05, "loss": 0.4739, "loss_nan_ranks": 0, "loss_rank_avg": 0.15621066093444824, "step": 685, "valid_targets_mean": 9754.2, "valid_targets_min": 3945 }, { "epoch": 3.7941176470588234, "grad_norm": 0.27437618997704516, "learning_rate": 2.065780650567683e-05, "loss": 0.474, "loss_nan_ranks": 0, "loss_rank_avg": 0.14808349311351776, "step": 690, "valid_targets_mean": 9839.2, "valid_targets_min": 4592 }, { "epoch": 3.8216911764705883, "grad_norm": 0.24821319026154287, "learning_rate": 2.038376612466793e-05, "loss": 0.4742, "loss_nan_ranks": 0, "loss_rank_avg": 0.16178636252880096, "step": 695, "valid_targets_mean": 11111.1, "valid_targets_min": 1996 }, { "epoch": 3.849264705882353, "grad_norm": 0.2465619978535496, "learning_rate": 2.0109653644473966e-05, "loss": 0.471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422886848449707, "step": 700, "valid_targets_mean": 10060.5, "valid_targets_min": 313 }, { "epoch": 3.8768382352941178, "grad_norm": 0.3119293770555057, "learning_rate": 1.9835520563351735e-05, "loss": 0.468, "loss_nan_ranks": 0, "loss_rank_avg": 0.1610221415758133, "step": 705, "valid_targets_mean": 10060.2, "valid_targets_min": 1469 }, { "epoch": 3.9044117647058822, "grad_norm": 0.27494281301179807, "learning_rate": 1.9561418383428374e-05, "loss": 0.4752, "loss_nan_ranks": 0, "loss_rank_avg": 0.162495955824852, "step": 710, "valid_targets_mean": 9654.4, "valid_targets_min": 4185 }, { "epoch": 3.931985294117647, "grad_norm": 0.26008772339215747, "learning_rate": 1.9287398601025562e-05, "loss": 0.4705, "loss_nan_ranks": 0, "loss_rank_avg": 0.15456292033195496, "step": 715, "valid_targets_mean": 9493.3, "valid_targets_min": 2208 }, { "epoch": 3.9595588235294117, "grad_norm": 0.271085036968312, "learning_rate": 1.9013512696984696e-05, "loss": 0.4689, "loss_nan_ranks": 0, "loss_rank_avg": 0.1571996957063675, "step": 720, "valid_targets_mean": 9132.5, "valid_targets_min": 1567 }, { "epoch": 3.9871323529411766, "grad_norm": 0.2411363224005689, "learning_rate": 1.8739812126995093e-05, "loss": 0.4722, "loss_nan_ranks": 0, "loss_rank_avg": 0.14264698326587677, "step": 725, "valid_targets_mean": 9456.3, "valid_targets_min": 1832 }, { "epoch": 4.011029411764706, "grad_norm": 0.2359597961140309, "learning_rate": 1.8466348311926863e-05, "loss": 0.466, "loss_nan_ranks": 0, "loss_rank_avg": 0.15933065116405487, "step": 730, "valid_targets_mean": 10693.0, "valid_targets_min": 5400 }, { "epoch": 4.038602941176471, "grad_norm": 0.26309525718508847, "learning_rate": 1.8193172628170324e-05, "loss": 0.4682, "loss_nan_ranks": 0, "loss_rank_avg": 0.15065504610538483, "step": 735, "valid_targets_mean": 10060.2, "valid_targets_min": 505 }, { "epoch": 4.0661764705882355, "grad_norm": 0.2473581577370302, "learning_rate": 1.792033639798377e-05, "loss": 0.4703, "loss_nan_ranks": 0, "loss_rank_avg": 0.1632881462574005, "step": 740, "valid_targets_mean": 9911.9, "valid_targets_min": 1738 }, { "epoch": 4.09375, "grad_norm": 0.2565338212535585, "learning_rate": 1.764789087985145e-05, "loss": 0.465, "loss_nan_ranks": 0, "loss_rank_avg": 0.13955152034759521, "step": 745, "valid_targets_mean": 8719.0, "valid_targets_min": 835 }, { "epoch": 4.1213235294117645, "grad_norm": 0.29464540307392384, "learning_rate": 1.737588725885345e-05, "loss": 0.4625, "loss_nan_ranks": 0, "loss_rank_avg": 0.15189291536808014, "step": 750, "valid_targets_mean": 9459.8, "valid_targets_min": 2465 }, { "epoch": 4.148897058823529, "grad_norm": 0.29502353988540564, "learning_rate": 1.7104376637049474e-05, "loss": 0.4647, "loss_nan_ranks": 0, "loss_rank_avg": 0.13400202989578247, "step": 755, "valid_targets_mean": 8704.5, "valid_targets_min": 1765 }, { "epoch": 4.176470588235294, "grad_norm": 0.2954768301391897, "learning_rate": 1.6833410023878104e-05, "loss": 0.4711, "loss_nan_ranks": 0, "loss_rank_avg": 0.15495823323726654, "step": 760, "valid_targets_mean": 10454.3, "valid_targets_min": 1863 }, { "epoch": 4.204044117647059, "grad_norm": 0.2420606644014351, "learning_rate": 1.6563038326573544e-05, "loss": 0.4664, "loss_nan_ranks": 0, "loss_rank_avg": 0.14864429831504822, "step": 765, "valid_targets_mean": 9574.8, "valid_targets_min": 2267 }, { "epoch": 4.231617647058823, "grad_norm": 0.22980463517846378, "learning_rate": 1.6293312340601545e-05, "loss": 0.4675, "loss_nan_ranks": 0, "loss_rank_avg": 0.168225958943367, "step": 770, "valid_targets_mean": 11102.2, "valid_targets_min": 3249 }, { "epoch": 4.259191176470588, "grad_norm": 0.2360042458287228, "learning_rate": 1.60242827401163e-05, "loss": 0.4629, "loss_nan_ranks": 0, "loss_rank_avg": 0.1540679782629013, "step": 775, "valid_targets_mean": 9416.9, "valid_targets_min": 455 }, { "epoch": 4.286764705882353, "grad_norm": 0.24859722691681074, "learning_rate": 1.5756000068440184e-05, "loss": 0.4595, "loss_nan_ranks": 0, "loss_rank_avg": 0.14932796359062195, "step": 780, "valid_targets_mean": 9871.3, "valid_targets_min": 1989 }, { "epoch": 4.314338235294118, "grad_norm": 0.24874113072111292, "learning_rate": 1.548851472856802e-05, "loss": 0.4659, "loss_nan_ranks": 0, "loss_rank_avg": 0.12739743292331696, "step": 785, "valid_targets_mean": 8745.9, "valid_targets_min": 1265 }, { "epoch": 4.341911764705882, "grad_norm": 0.2616979627725329, "learning_rate": 1.5221876973697729e-05, "loss": 0.4675, "loss_nan_ranks": 0, "loss_rank_avg": 0.16212889552116394, "step": 790, "valid_targets_mean": 9819.2, "valid_targets_min": 1922 }, { "epoch": 4.369485294117647, "grad_norm": 0.2605902813842398, "learning_rate": 1.4956136897789155e-05, "loss": 0.4682, "loss_nan_ranks": 0, "loss_rank_avg": 0.1644110381603241, "step": 795, "valid_targets_mean": 10543.6, "valid_targets_min": 2238 }, { "epoch": 4.397058823529412, "grad_norm": 0.27655005722080517, "learning_rate": 1.4691344426152733e-05, "loss": 0.4684, "loss_nan_ranks": 0, "loss_rank_avg": 0.13570016622543335, "step": 800, "valid_targets_mean": 7597.6, "valid_targets_min": 349 }, { "epoch": 4.424632352941177, "grad_norm": 0.2681286272432588, "learning_rate": 1.4427549306069915e-05, "loss": 0.468, "loss_nan_ranks": 0, "loss_rank_avg": 0.17025525867938995, "step": 805, "valid_targets_mean": 10451.5, "valid_targets_min": 2785 }, { "epoch": 4.452205882352941, "grad_norm": 0.25262683253593055, "learning_rate": 1.416480109744701e-05, "loss": 0.4658, "loss_nan_ranks": 0, "loss_rank_avg": 0.14067339897155762, "step": 810, "valid_targets_mean": 9118.7, "valid_targets_min": 375 }, { "epoch": 4.479779411764706, "grad_norm": 0.2833620837271674, "learning_rate": 1.3903149163504221e-05, "loss": 0.4733, "loss_nan_ranks": 0, "loss_rank_avg": 0.17299406230449677, "step": 815, "valid_targets_mean": 9922.2, "valid_targets_min": 2029 }, { "epoch": 4.507352941176471, "grad_norm": 0.28237169634387094, "learning_rate": 1.3642642661501641e-05, "loss": 0.4637, "loss_nan_ranks": 0, "loss_rank_avg": 0.1708087921142578, "step": 820, "valid_targets_mean": 10745.8, "valid_targets_min": 4792 }, { "epoch": 4.5349264705882355, "grad_norm": 0.24345906344118387, "learning_rate": 1.3383330533503971e-05, "loss": 0.4705, "loss_nan_ranks": 0, "loss_rank_avg": 0.15734925866127014, "step": 825, "valid_targets_mean": 9852.8, "valid_targets_min": 2881 }, { "epoch": 4.5625, "grad_norm": 0.24266003465888375, "learning_rate": 1.3125261497185588e-05, "loss": 0.4646, "loss_nan_ranks": 0, "loss_rank_avg": 0.14917130768299103, "step": 830, "valid_targets_mean": 10095.4, "valid_targets_min": 1468 }, { "epoch": 4.5900735294117645, "grad_norm": 0.24693814616775356, "learning_rate": 1.2868484036677896e-05, "loss": 0.4676, "loss_nan_ranks": 0, "loss_rank_avg": 0.14834946393966675, "step": 835, "valid_targets_mean": 9061.1, "valid_targets_min": 1531 }, { "epoch": 4.617647058823529, "grad_norm": 0.25767151072786965, "learning_rate": 1.2613046393460411e-05, "loss": 0.4694, "loss_nan_ranks": 0, "loss_rank_avg": 0.1626255214214325, "step": 840, "valid_targets_mean": 9341.8, "valid_targets_min": 2334 }, { "epoch": 4.645220588235294, "grad_norm": 0.2523502952262432, "learning_rate": 1.2358996557297532e-05, "loss": 0.4685, "loss_nan_ranks": 0, "loss_rank_avg": 0.14751945436000824, "step": 845, "valid_targets_mean": 9205.8, "valid_targets_min": 2278 }, { "epoch": 4.672794117647059, "grad_norm": 0.2671934827994343, "learning_rate": 1.2106382257222595e-05, "loss": 0.4657, "loss_nan_ranks": 0, "loss_rank_avg": 0.1465722620487213, "step": 850, "valid_targets_mean": 9662.6, "valid_targets_min": 818 }, { "epoch": 4.700367647058823, "grad_norm": 0.2678177528324946, "learning_rate": 1.1855250952570852e-05, "loss": 0.4632, "loss_nan_ranks": 0, "loss_rank_avg": 0.18250469863414764, "step": 855, "valid_targets_mean": 11265.1, "valid_targets_min": 3015 }, { "epoch": 4.727941176470588, "grad_norm": 0.26786812942141464, "learning_rate": 1.1605649824063176e-05, "loss": 0.4704, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485501229763031, "step": 860, "valid_targets_mean": 9213.1, "valid_targets_min": 1661 }, { "epoch": 4.755514705882353, "grad_norm": 0.23312897885245942, "learning_rate": 1.1357625764942095e-05, "loss": 0.4646, "loss_nan_ranks": 0, "loss_rank_avg": 0.16314777731895447, "step": 865, "valid_targets_mean": 9994.2, "valid_targets_min": 1692 }, { "epoch": 4.783088235294118, "grad_norm": 0.24908363289906799, "learning_rate": 1.1111225372161818e-05, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.15313473343849182, "step": 870, "valid_targets_mean": 9034.0, "valid_targets_min": 1750 }, { "epoch": 4.810661764705882, "grad_norm": 0.23939462069601722, "learning_rate": 1.0866494937633953e-05, "loss": 0.4616, "loss_nan_ranks": 0, "loss_rank_avg": 0.14009788632392883, "step": 875, "valid_targets_mean": 8579.7, "valid_targets_min": 1839 }, { "epoch": 4.838235294117647, "grad_norm": 0.23955295963992793, "learning_rate": 1.0623480439530493e-05, "loss": 0.468, "loss_nan_ranks": 0, "loss_rank_avg": 0.18267551064491272, "step": 880, "valid_targets_mean": 10546.3, "valid_targets_min": 3024 }, { "epoch": 4.865808823529412, "grad_norm": 0.2616170432420212, "learning_rate": 1.038222753364581e-05, "loss": 0.4693, "loss_nan_ranks": 0, "loss_rank_avg": 0.1501752734184265, "step": 885, "valid_targets_mean": 9260.8, "valid_targets_min": 1575 }, { "epoch": 4.893382352941177, "grad_norm": 0.22421807483190181, "learning_rate": 1.0142781544819158e-05, "loss": 0.4669, "loss_nan_ranks": 0, "loss_rank_avg": 0.15006357431411743, "step": 890, "valid_targets_mean": 10454.8, "valid_targets_min": 5073 }, { "epoch": 4.920955882352941, "grad_norm": 0.22432441115476565, "learning_rate": 9.905187458419343e-06, "loss": 0.4628, "loss_nan_ranks": 0, "loss_rank_avg": 0.1583147495985031, "step": 895, "valid_targets_mean": 9921.4, "valid_targets_min": 2207 }, { "epoch": 4.948529411764706, "grad_norm": 0.25000893710548316, "learning_rate": 9.669489911893261e-06, "loss": 0.4679, "loss_nan_ranks": 0, "loss_rank_avg": 0.16411854326725006, "step": 900, "valid_targets_mean": 9388.2, "valid_targets_min": 3775 }, { "epoch": 4.976102941176471, "grad_norm": 0.2608534142049316, "learning_rate": 9.435733186379694e-06, "loss": 0.4591, "loss_nan_ranks": 0, "loss_rank_avg": 0.14242586493492126, "step": 905, "valid_targets_mean": 9715.4, "valid_targets_min": 2367 }, { "epoch": 5.0, "grad_norm": 0.528080421416379, "learning_rate": 9.2039611983901e-06, "loss": 0.4665, "loss_nan_ranks": 0, "loss_rank_avg": 0.45787763595581055, "step": 910, "valid_targets_mean": 9017.6, "valid_targets_min": 1527 }, { "epoch": 5.0275735294117645, "grad_norm": 0.25364778323379056, "learning_rate": 8.974217491557916e-06, "loss": 0.4582, "loss_nan_ranks": 0, "loss_rank_avg": 0.15120220184326172, "step": 915, "valid_targets_mean": 9724.2, "valid_targets_min": 4256 }, { "epoch": 5.055147058823529, "grad_norm": 0.2720756490768129, "learning_rate": 8.746545228457864e-06, "loss": 0.4627, "loss_nan_ranks": 0, "loss_rank_avg": 0.18436521291732788, "step": 920, "valid_targets_mean": 10848.2, "valid_targets_min": 2906 }, { "epoch": 5.082720588235294, "grad_norm": 0.2476109752744865, "learning_rate": 8.520987182496916e-06, "loss": 0.4615, "loss_nan_ranks": 0, "loss_rank_avg": 0.1586351841688156, "step": 925, "valid_targets_mean": 9801.3, "valid_targets_min": 2511 }, { "epoch": 5.110294117647059, "grad_norm": 0.24561606722052834, "learning_rate": 8.297585729878328e-06, "loss": 0.4605, "loss_nan_ranks": 0, "loss_rank_avg": 0.15383873879909515, "step": 930, "valid_targets_mean": 10230.4, "valid_targets_min": 1542 }, { "epoch": 5.137867647058823, "grad_norm": 0.23155125931202536, "learning_rate": 8.076382841640278e-06, "loss": 0.4595, "loss_nan_ranks": 0, "loss_rank_avg": 0.15065628290176392, "step": 935, "valid_targets_mean": 9486.9, "valid_targets_min": 2504 }, { "epoch": 5.165441176470588, "grad_norm": 0.2386411883344939, "learning_rate": 7.8574200757707e-06, "loss": 0.4691, "loss_nan_ranks": 0, "loss_rank_avg": 0.16948550939559937, "step": 940, "valid_targets_mean": 10931.1, "valid_targets_min": 1488 }, { "epoch": 5.193014705882353, "grad_norm": 0.24623043409190853, "learning_rate": 7.640738569399645e-06, "loss": 0.4627, "loss_nan_ranks": 0, "loss_rank_avg": 0.17989715933799744, "step": 945, "valid_targets_mean": 11805.3, "valid_targets_min": 5070 }, { "epoch": 5.220588235294118, "grad_norm": 0.25645332420887573, "learning_rate": 7.426379031070736e-06, "loss": 0.4653, "loss_nan_ranks": 0, "loss_rank_avg": 0.14318367838859558, "step": 950, "valid_targets_mean": 9511.3, "valid_targets_min": 2171 }, { "epoch": 5.248161764705882, "grad_norm": 0.2509019424889472, "learning_rate": 7.214381733093156e-06, "loss": 0.4623, "loss_nan_ranks": 0, "loss_rank_avg": 0.14922644197940826, "step": 955, "valid_targets_mean": 9984.3, "valid_targets_min": 3056 }, { "epoch": 5.275735294117647, "grad_norm": 0.22957419096439602, "learning_rate": 7.004786503975552e-06, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.15641915798187256, "step": 960, "valid_targets_mean": 10315.0, "valid_targets_min": 4133 }, { "epoch": 5.303308823529412, "grad_norm": 0.28021168498690796, "learning_rate": 6.7976327209433855e-06, "loss": 0.4604, "loss_nan_ranks": 0, "loss_rank_avg": 0.1481720507144928, "step": 965, "valid_targets_mean": 9206.2, "valid_targets_min": 1886 }, { "epoch": 5.330882352941177, "grad_norm": 0.2426926206109474, "learning_rate": 6.592959302541004e-06, "loss": 0.4589, "loss_nan_ranks": 0, "loss_rank_avg": 0.173628568649292, "step": 970, "valid_targets_mean": 10906.5, "valid_targets_min": 4457 }, { "epoch": 5.358455882352941, "grad_norm": 0.23348669978255307, "learning_rate": 6.39080470131989e-06, "loss": 0.4609, "loss_nan_ranks": 0, "loss_rank_avg": 0.16939929127693176, "step": 975, "valid_targets_mean": 10147.1, "valid_targets_min": 1450 }, { "epoch": 5.386029411764706, "grad_norm": 0.2413777466766637, "learning_rate": 6.1912068966145145e-06, "loss": 0.4565, "loss_nan_ranks": 0, "loss_rank_avg": 0.15642428398132324, "step": 980, "valid_targets_mean": 9358.1, "valid_targets_min": 535 }, { "epoch": 5.413602941176471, "grad_norm": 0.2388089637438226, "learning_rate": 5.994203387407036e-06, "loss": 0.4629, "loss_nan_ranks": 0, "loss_rank_avg": 0.1432351917028427, "step": 985, "valid_targets_mean": 9491.7, "valid_targets_min": 1802 }, { "epoch": 5.4411764705882355, "grad_norm": 0.2190404554471656, "learning_rate": 5.7998311852822406e-06, "loss": 0.4633, "loss_nan_ranks": 0, "loss_rank_avg": 0.16985687613487244, "step": 990, "valid_targets_mean": 10872.9, "valid_targets_min": 2969 }, { "epoch": 5.46875, "grad_norm": 0.2238371695459484, "learning_rate": 5.608126807474145e-06, "loss": 0.4622, "loss_nan_ranks": 0, "loss_rank_avg": 0.15325827896595, "step": 995, "valid_targets_mean": 10413.0, "valid_targets_min": 2987 }, { "epoch": 5.4963235294117645, "grad_norm": 0.23551352199294445, "learning_rate": 5.419126270005317e-06, "loss": 0.4626, "loss_nan_ranks": 0, "loss_rank_avg": 0.14396341145038605, "step": 1000, "valid_targets_mean": 9435.3, "valid_targets_min": 2314 }, { "epoch": 5.523897058823529, "grad_norm": 0.24552735754578195, "learning_rate": 5.23286508092051e-06, "loss": 0.4557, "loss_nan_ranks": 0, "loss_rank_avg": 0.15775898098945618, "step": 1005, "valid_targets_mean": 9846.8, "valid_targets_min": 2897 }, { "epoch": 5.551470588235294, "grad_norm": 0.23811014424649815, "learning_rate": 5.049378233615652e-06, "loss": 0.4656, "loss_nan_ranks": 0, "loss_rank_avg": 0.15497168898582458, "step": 1010, "valid_targets_mean": 9349.0, "valid_targets_min": 4354 }, { "epoch": 5.579044117647059, "grad_norm": 0.22012023372507486, "learning_rate": 4.868700200263521e-06, "loss": 0.4624, "loss_nan_ranks": 0, "loss_rank_avg": 0.1530693769454956, "step": 1015, "valid_targets_mean": 10066.7, "valid_targets_min": 3738 }, { "epoch": 5.606617647058823, "grad_norm": 0.2384744886144643, "learning_rate": 4.690864925337404e-06, "loss": 0.459, "loss_nan_ranks": 0, "loss_rank_avg": 0.14344365894794464, "step": 1020, "valid_targets_mean": 9216.1, "valid_targets_min": 2135 }, { "epoch": 5.634191176470588, "grad_norm": 0.22620664113381828, "learning_rate": 4.515905819233828e-06, "loss": 0.4587, "loss_nan_ranks": 0, "loss_rank_avg": 0.15465840697288513, "step": 1025, "valid_targets_mean": 9175.2, "valid_targets_min": 2058 }, { "epoch": 5.661764705882353, "grad_norm": 0.2308131065264548, "learning_rate": 4.343855751995645e-06, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.14798535406589508, "step": 1030, "valid_targets_mean": 9069.3, "valid_targets_min": 1487 }, { "epoch": 5.689338235294118, "grad_norm": 0.20210750386819545, "learning_rate": 4.174747047136707e-06, "loss": 0.4629, "loss_nan_ranks": 0, "loss_rank_avg": 0.13314756751060486, "step": 1035, "valid_targets_mean": 8539.2, "valid_targets_min": 2094 }, { "epoch": 5.716911764705882, "grad_norm": 0.2182748594468666, "learning_rate": 4.008611475569082e-06, "loss": 0.4623, "loss_nan_ranks": 0, "loss_rank_avg": 0.1582449972629547, "step": 1040, "valid_targets_mean": 10496.3, "valid_targets_min": 1687 }, { "epoch": 5.744485294117647, "grad_norm": 0.22255571373558947, "learning_rate": 3.845480249634226e-06, "loss": 0.4664, "loss_nan_ranks": 0, "loss_rank_avg": 0.15279759466648102, "step": 1045, "valid_targets_mean": 9399.6, "valid_targets_min": 2004 }, { "epoch": 5.772058823529412, "grad_norm": 0.23780299660682128, "learning_rate": 3.685384017239013e-06, "loss": 0.4563, "loss_nan_ranks": 0, "loss_rank_avg": 0.1619456708431244, "step": 1050, "valid_targets_mean": 10361.6, "valid_targets_min": 2616 }, { "epoch": 5.799632352941177, "grad_norm": 0.2149753999385376, "learning_rate": 3.5283528560978163e-06, "loss": 0.4606, "loss_nan_ranks": 0, "loss_rank_avg": 0.15598419308662415, "step": 1055, "valid_targets_mean": 10178.3, "valid_targets_min": 1567 }, { "epoch": 5.827205882352941, "grad_norm": 0.23604543135606404, "learning_rate": 3.3744162680817526e-06, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.15586526691913605, "step": 1060, "valid_targets_mean": 10143.1, "valid_targets_min": 2785 }, { "epoch": 5.854779411764706, "grad_norm": 0.21782882942010112, "learning_rate": 3.2236031736760775e-06, "loss": 0.4628, "loss_nan_ranks": 0, "loss_rank_avg": 0.1613750457763672, "step": 1065, "valid_targets_mean": 10578.9, "valid_targets_min": 3416 }, { "epoch": 5.882352941176471, "grad_norm": 0.24360429896673724, "learning_rate": 3.075941906546789e-06, "loss": 0.4643, "loss_nan_ranks": 0, "loss_rank_avg": 0.17128227651119232, "step": 1070, "valid_targets_mean": 10810.2, "valid_targets_min": 4489 }, { "epoch": 5.9099264705882355, "grad_norm": 0.21853281708343872, "learning_rate": 2.9314602082175624e-06, "loss": 0.4634, "loss_nan_ranks": 0, "loss_rank_avg": 0.1510457843542099, "step": 1075, "valid_targets_mean": 9562.4, "valid_targets_min": 2854 }, { "epoch": 5.9375, "grad_norm": 0.21718471329556974, "learning_rate": 2.790185222857804e-06, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.14728333055973053, "step": 1080, "valid_targets_mean": 9295.6, "valid_targets_min": 1896 }, { "epoch": 5.9650735294117645, "grad_norm": 0.21759757406424912, "learning_rate": 2.6521434921830593e-06, "loss": 0.4602, "loss_nan_ranks": 0, "loss_rank_avg": 0.16067388653755188, "step": 1085, "valid_targets_mean": 9997.4, "valid_targets_min": 1567 }, { "epoch": 5.992647058823529, "grad_norm": 0.2372799465315037, "learning_rate": 2.517360950468519e-06, "loss": 0.4564, "loss_nan_ranks": 0, "loss_rank_avg": 0.14969328045845032, "step": 1090, "valid_targets_mean": 9974.2, "valid_targets_min": 2365 }, { "epoch": 6.016544117647059, "grad_norm": 0.22070664266477452, "learning_rate": 2.3858629196766846e-06, "loss": 0.4622, "loss_nan_ranks": 0, "loss_rank_avg": 0.14457306265830994, "step": 1095, "valid_targets_mean": 8829.4, "valid_targets_min": 2041 }, { "epoch": 6.044117647058823, "grad_norm": 0.23770514220168587, "learning_rate": 2.2576741047000605e-06, "loss": 0.4634, "loss_nan_ranks": 0, "loss_rank_avg": 0.1596033126115799, "step": 1100, "valid_targets_mean": 10972.9, "valid_targets_min": 5689 }, { "epoch": 6.071691176470588, "grad_norm": 0.2380064578876329, "learning_rate": 2.1328185887197872e-06, "loss": 0.4583, "loss_nan_ranks": 0, "loss_rank_avg": 0.16930876672267914, "step": 1105, "valid_targets_mean": 9747.8, "valid_targets_min": 2320 }, { "epoch": 6.099264705882353, "grad_norm": 0.22187661591130384, "learning_rate": 2.011319828681049e-06, "loss": 0.4545, "loss_nan_ranks": 0, "loss_rank_avg": 0.1560242772102356, "step": 1110, "valid_targets_mean": 9941.7, "valid_targets_min": 1490 }, { "epoch": 6.126838235294118, "grad_norm": 0.22871735612785934, "learning_rate": 1.8932006508861866e-06, "loss": 0.4582, "loss_nan_ranks": 0, "loss_rank_avg": 0.13863195478916168, "step": 1115, "valid_targets_mean": 8947.5, "valid_targets_min": 1633 }, { "epoch": 6.154411764705882, "grad_norm": 0.220041969177945, "learning_rate": 1.7784832467062129e-06, "loss": 0.4631, "loss_nan_ranks": 0, "loss_rank_avg": 0.16414231061935425, "step": 1120, "valid_targets_mean": 10703.4, "valid_targets_min": 1974 }, { "epoch": 6.181985294117647, "grad_norm": 0.22496390430322033, "learning_rate": 1.6671891684117048e-06, "loss": 0.4559, "loss_nan_ranks": 0, "loss_rank_avg": 0.16701489686965942, "step": 1125, "valid_targets_mean": 11036.8, "valid_targets_min": 1223 }, { "epoch": 6.209558823529412, "grad_norm": 0.23390335972144527, "learning_rate": 1.55933932512369e-06, "loss": 0.4602, "loss_nan_ranks": 0, "loss_rank_avg": 0.15574294328689575, "step": 1130, "valid_targets_mean": 9363.7, "valid_targets_min": 2356 }, { "epoch": 6.237132352941177, "grad_norm": 0.21186034476089416, "learning_rate": 1.4549539788853984e-06, "loss": 0.4616, "loss_nan_ranks": 0, "loss_rank_avg": 0.167169451713562, "step": 1135, "valid_targets_mean": 9769.6, "valid_targets_min": 1886 }, { "epoch": 6.264705882352941, "grad_norm": 0.24246484888375872, "learning_rate": 1.3540527408555915e-06, "loss": 0.4573, "loss_nan_ranks": 0, "loss_rank_avg": 0.13978439569473267, "step": 1140, "valid_targets_mean": 9072.4, "valid_targets_min": 1851 }, { "epoch": 6.292279411764706, "grad_norm": 0.20852258029588774, "learning_rate": 1.2566545676241494e-06, "loss": 0.4636, "loss_nan_ranks": 0, "loss_rank_avg": 0.16217908263206482, "step": 1145, "valid_targets_mean": 10326.9, "valid_targets_min": 2180 }, { "epoch": 6.319852941176471, "grad_norm": 0.259220830437003, "learning_rate": 1.1627777576506306e-06, "loss": 0.4647, "loss_nan_ranks": 0, "loss_rank_avg": 0.13484236598014832, "step": 1150, "valid_targets_mean": 8036.9, "valid_targets_min": 2414 }, { "epoch": 6.3474264705882355, "grad_norm": 0.23165660017295853, "learning_rate": 1.0724399478265312e-06, "loss": 0.4619, "loss_nan_ranks": 0, "loss_rank_avg": 0.15904231369495392, "step": 1155, "valid_targets_mean": 10136.1, "valid_targets_min": 3152 }, { "epoch": 6.375, "grad_norm": 0.21620185766223732, "learning_rate": 9.85658110161747e-07, "loss": 0.463, "loss_nan_ranks": 0, "loss_rank_avg": 0.15874940156936646, "step": 1160, "valid_targets_mean": 10696.2, "valid_targets_min": 1469 }, { "epoch": 6.4025735294117645, "grad_norm": 0.2266371932340415, "learning_rate": 9.02448548596031e-07, "loss": 0.4628, "loss_nan_ranks": 0, "loss_rank_avg": 0.15737676620483398, "step": 1165, "valid_targets_mean": 10155.7, "valid_targets_min": 1265 }, { "epoch": 6.430147058823529, "grad_norm": 0.22665087403909726, "learning_rate": 8.228268959359086e-07, "loss": 0.4626, "loss_nan_ranks": 0, "loss_rank_avg": 0.15128636360168457, "step": 1170, "valid_targets_mean": 9104.2, "valid_targets_min": 1738 }, { "epoch": 6.457720588235294, "grad_norm": 0.279337484358945, "learning_rate": 7.468081109177028e-07, "loss": 0.4574, "loss_nan_ranks": 0, "loss_rank_avg": 0.13439278304576874, "step": 1175, "valid_targets_mean": 8199.8, "valid_targets_min": 1505 }, { "epoch": 6.485294117647059, "grad_norm": 0.22106817456760308, "learning_rate": 6.744064753972068e-07, "loss": 0.4646, "loss_nan_ranks": 0, "loss_rank_avg": 0.15889695286750793, "step": 1180, "valid_targets_mean": 9808.5, "valid_targets_min": 2024 }, { "epoch": 6.512867647058823, "grad_norm": 0.20952794028596322, "learning_rate": 6.056355916665024e-07, "loss": 0.4559, "loss_nan_ranks": 0, "loss_rank_avg": 0.16096243262290955, "step": 1185, "valid_targets_mean": 10794.8, "valid_targets_min": 3436 }, { "epoch": 6.540441176470588, "grad_norm": 0.25549266747636473, "learning_rate": 5.405083798984567e-07, "loss": 0.4541, "loss_nan_ranks": 0, "loss_rank_avg": 0.1415504813194275, "step": 1190, "valid_targets_mean": 8488.8, "valid_targets_min": 1557 }, { "epoch": 6.568014705882353, "grad_norm": 0.21576881708453866, "learning_rate": 4.790370757193907e-07, "loss": 0.4566, "loss_nan_ranks": 0, "loss_rank_avg": 0.14048513770103455, "step": 1195, "valid_targets_mean": 8573.9, "valid_targets_min": 1413 }, { "epoch": 6.595588235294118, "grad_norm": 0.21385907663265988, "learning_rate": 4.212332279103204e-07, "loss": 0.4538, "loss_nan_ranks": 0, "loss_rank_avg": 0.14507344365119934, "step": 1200, "valid_targets_mean": 8611.7, "valid_targets_min": 1369 }, { "epoch": 6.623161764705882, "grad_norm": 0.27787320041061075, "learning_rate": 3.671076962372655e-07, "loss": 0.4598, "loss_nan_ranks": 0, "loss_rank_avg": 0.17089204490184784, "step": 1205, "valid_targets_mean": 10808.4, "valid_targets_min": 2900 }, { "epoch": 6.650735294117647, "grad_norm": 0.21747531396757017, "learning_rate": 3.1667064941099724e-07, "loss": 0.4568, "loss_nan_ranks": 0, "loss_rank_avg": 0.14115549623966217, "step": 1210, "valid_targets_mean": 8620.9, "valid_targets_min": 2139 }, { "epoch": 6.678308823529412, "grad_norm": 0.23882168189925346, "learning_rate": 2.699315631766064e-07, "loss": 0.4632, "loss_nan_ranks": 0, "loss_rank_avg": 0.1577366292476654, "step": 1215, "valid_targets_mean": 10010.7, "valid_targets_min": 2525 }, { "epoch": 6.705882352941177, "grad_norm": 0.21158133975405694, "learning_rate": 2.26899218533283e-07, "loss": 0.46, "loss_nan_ranks": 0, "loss_rank_avg": 0.15490460395812988, "step": 1220, "valid_targets_mean": 10206.4, "valid_targets_min": 1948 }, { "epoch": 6.733455882352941, "grad_norm": 0.2068070148831523, "learning_rate": 1.8758170008459142e-07, "loss": 0.4624, "loss_nan_ranks": 0, "loss_rank_avg": 0.17629992961883545, "step": 1225, "valid_targets_mean": 11098.0, "valid_targets_min": 2855 }, { "epoch": 6.761029411764706, "grad_norm": 0.276570316072206, "learning_rate": 1.5198639451960095e-07, "loss": 0.4548, "loss_nan_ranks": 0, "loss_rank_avg": 0.1544700562953949, "step": 1230, "valid_targets_mean": 10218.4, "valid_targets_min": 1690 }, { "epoch": 6.788602941176471, "grad_norm": 0.24887539900389968, "learning_rate": 1.201199892251337e-07, "loss": 0.4555, "loss_nan_ranks": 0, "loss_rank_avg": 0.16659650206565857, "step": 1235, "valid_targets_mean": 11563.8, "valid_targets_min": 1522 }, { "epoch": 6.8161764705882355, "grad_norm": 0.2186914252195666, "learning_rate": 9.198847102937614e-08, "loss": 0.4552, "loss_nan_ranks": 0, "loss_rank_avg": 0.15176278352737427, "step": 1240, "valid_targets_mean": 9547.0, "valid_targets_min": 2825 }, { "epoch": 6.84375, "grad_norm": 0.20820326483482113, "learning_rate": 6.759712507711902e-08, "loss": 0.4622, "loss_nan_ranks": 0, "loss_rank_avg": 0.15682631731033325, "step": 1245, "valid_targets_mean": 9983.1, "valid_targets_min": 2230 }, { "epoch": 6.8713235294117645, "grad_norm": 0.22180946034862845, "learning_rate": 4.695053383683812e-08, "loss": 0.457, "loss_nan_ranks": 0, "loss_rank_avg": 0.14503879845142365, "step": 1250, "valid_targets_mean": 8904.7, "valid_targets_min": 1080 }, { "epoch": 6.898897058823529, "grad_norm": 0.2383751709023781, "learning_rate": 3.0052576239749666e-08, "loss": 0.4554, "loss_nan_ranks": 0, "loss_rank_avg": 0.1317831426858902, "step": 1255, "valid_targets_mean": 8981.2, "valid_targets_min": 1992 }, { "epoch": 6.926470588235294, "grad_norm": 0.2107262711334655, "learning_rate": 1.6906426951086573e-08, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.14984363317489624, "step": 1260, "valid_targets_mean": 10053.0, "valid_targets_min": 3060 }, { "epoch": 6.954044117647059, "grad_norm": 0.2623126377291314, "learning_rate": 7.514555773648901e-09, "loss": 0.4614, "loss_nan_ranks": 0, "loss_rank_avg": 0.16520237922668457, "step": 1265, "valid_targets_mean": 10900.3, "valid_targets_min": 3734 }, { "epoch": 6.981617647058823, "grad_norm": 0.2030352524829392, "learning_rate": 1.8787271838083263e-09, "loss": 0.4614, "loss_nan_ranks": 0, "loss_rank_avg": 0.15681001543998718, "step": 1270, "valid_targets_mean": 10194.2, "valid_targets_min": 4309 }, { "epoch": 7.0, "step": 1274, "total_flos": 5.319956987025293e+18, "train_loss": 0.0, "train_runtime": 1.2382, "train_samples_per_second": 98264.932, "train_steps_per_second": 1028.893 } ], "logging_steps": 5, "max_steps": 1274, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.319956987025293e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }