{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024630541871921183, "grad_norm": 10.743034651431167, "learning_rate": 1.5686274509803923e-06, "loss": 0.8575, "loss_nan_ranks": 0, "loss_rank_avg": 0.29485759139060974, "step": 5, "valid_targets_mean": 9236.3, "valid_targets_min": 1969 }, { "epoch": 0.04926108374384237, "grad_norm": 4.965625847050405, "learning_rate": 3.529411764705883e-06, "loss": 0.8189, "loss_nan_ranks": 0, "loss_rank_avg": 0.27123022079467773, "step": 10, "valid_targets_mean": 9325.8, "valid_targets_min": 845 }, { "epoch": 0.07389162561576355, "grad_norm": 1.5709764932057944, "learning_rate": 5.4901960784313735e-06, "loss": 0.7166, "loss_nan_ranks": 0, "loss_rank_avg": 0.22228774428367615, "step": 15, "valid_targets_mean": 8391.1, "valid_targets_min": 1557 }, { "epoch": 0.09852216748768473, "grad_norm": 1.1070034152098873, "learning_rate": 7.450980392156863e-06, "loss": 0.6554, "loss_nan_ranks": 0, "loss_rank_avg": 0.19853773713111877, "step": 20, "valid_targets_mean": 8116.2, "valid_targets_min": 1655 }, { "epoch": 0.12315270935960591, "grad_norm": 0.7952086999098457, "learning_rate": 9.411764705882354e-06, "loss": 0.6269, "loss_nan_ranks": 0, "loss_rank_avg": 0.2077217698097229, "step": 25, "valid_targets_mean": 9368.0, "valid_targets_min": 2823 }, { "epoch": 0.1477832512315271, "grad_norm": 0.649366154808441, "learning_rate": 1.1372549019607844e-05, "loss": 0.5914, "loss_nan_ranks": 0, "loss_rank_avg": 0.18282532691955566, "step": 30, "valid_targets_mean": 8624.1, "valid_targets_min": 3036 }, { "epoch": 0.1724137931034483, "grad_norm": 0.4388941481235093, "learning_rate": 1.3333333333333333e-05, "loss": 0.5478, "loss_nan_ranks": 0, "loss_rank_avg": 0.1840602457523346, "step": 35, "valid_targets_mean": 9535.8, "valid_targets_min": 3381 }, { "epoch": 0.19704433497536947, "grad_norm": 0.3731243643842238, "learning_rate": 1.5294117647058822e-05, "loss": 0.5168, "loss_nan_ranks": 0, "loss_rank_avg": 0.15646323561668396, "step": 40, "valid_targets_mean": 7694.9, "valid_targets_min": 1593 }, { "epoch": 0.22167487684729065, "grad_norm": 0.314999922874189, "learning_rate": 1.7254901960784314e-05, "loss": 0.5032, "loss_nan_ranks": 0, "loss_rank_avg": 0.15275967121124268, "step": 45, "valid_targets_mean": 8356.0, "valid_targets_min": 2848 }, { "epoch": 0.24630541871921183, "grad_norm": 0.27241302838236864, "learning_rate": 1.9215686274509807e-05, "loss": 0.482, "loss_nan_ranks": 0, "loss_rank_avg": 0.1699313223361969, "step": 50, "valid_targets_mean": 8695.2, "valid_targets_min": 1878 }, { "epoch": 0.270935960591133, "grad_norm": 0.24171316383457545, "learning_rate": 2.1176470588235296e-05, "loss": 0.4697, "loss_nan_ranks": 0, "loss_rank_avg": 0.15137627720832825, "step": 55, "valid_targets_mean": 8684.1, "valid_targets_min": 2323 }, { "epoch": 0.2955665024630542, "grad_norm": 0.2752759358588667, "learning_rate": 2.3137254901960788e-05, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.15112951397895813, "step": 60, "valid_targets_mean": 8717.0, "valid_targets_min": 352 }, { "epoch": 0.32019704433497537, "grad_norm": 0.3256829290721947, "learning_rate": 2.5098039215686277e-05, "loss": 0.4471, "loss_nan_ranks": 0, "loss_rank_avg": 0.15601575374603271, "step": 65, "valid_targets_mean": 8975.6, "valid_targets_min": 777 }, { "epoch": 0.3448275862068966, "grad_norm": 0.26016520519307323, "learning_rate": 2.705882352941177e-05, "loss": 0.4394, "loss_nan_ranks": 0, "loss_rank_avg": 0.14817699790000916, "step": 70, "valid_targets_mean": 8358.4, "valid_targets_min": 4473 }, { "epoch": 0.3694581280788177, "grad_norm": 0.27210923180659075, "learning_rate": 2.9019607843137258e-05, "loss": 0.4316, "loss_nan_ranks": 0, "loss_rank_avg": 0.13734804093837738, "step": 75, "valid_targets_mean": 8500.8, "valid_targets_min": 1844 }, { "epoch": 0.39408866995073893, "grad_norm": 0.2762491285053443, "learning_rate": 3.098039215686275e-05, "loss": 0.4175, "loss_nan_ranks": 0, "loss_rank_avg": 0.12119661271572113, "step": 80, "valid_targets_mean": 8555.7, "valid_targets_min": 2666 }, { "epoch": 0.4187192118226601, "grad_norm": 0.27953653823930036, "learning_rate": 3.294117647058824e-05, "loss": 0.4151, "loss_nan_ranks": 0, "loss_rank_avg": 0.13656310737133026, "step": 85, "valid_targets_mean": 9335.4, "valid_targets_min": 2375 }, { "epoch": 0.4433497536945813, "grad_norm": 0.27762179667403303, "learning_rate": 3.490196078431373e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.14938178658485413, "step": 90, "valid_targets_mean": 9836.8, "valid_targets_min": 4990 }, { "epoch": 0.46798029556650245, "grad_norm": 0.30533872508264365, "learning_rate": 3.686274509803922e-05, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.12681949138641357, "step": 95, "valid_targets_mean": 8551.3, "valid_targets_min": 2982 }, { "epoch": 0.49261083743842365, "grad_norm": 0.3178616151302271, "learning_rate": 3.882352941176471e-05, "loss": 0.4092, "loss_nan_ranks": 0, "loss_rank_avg": 0.13543638586997986, "step": 100, "valid_targets_mean": 8156.8, "valid_targets_min": 378 }, { "epoch": 0.5172413793103449, "grad_norm": 0.3480550602168805, "learning_rate": 3.999952639479403e-05, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.1413741409778595, "step": 105, "valid_targets_mean": 8588.5, "valid_targets_min": 2292 }, { "epoch": 0.541871921182266, "grad_norm": 0.2886232799853676, "learning_rate": 3.999419859382013e-05, "loss": 0.4066, "loss_nan_ranks": 0, "loss_rank_avg": 0.12243609875440598, "step": 110, "valid_targets_mean": 8215.7, "valid_targets_min": 996 }, { "epoch": 0.5665024630541872, "grad_norm": 0.30402912482935435, "learning_rate": 3.99829525676357e-05, "loss": 0.3992, "loss_nan_ranks": 0, "loss_rank_avg": 0.13463672995567322, "step": 115, "valid_targets_mean": 9568.1, "valid_targets_min": 1704 }, { "epoch": 0.5911330049261084, "grad_norm": 0.31173572113829323, "learning_rate": 3.996579164503212e-05, "loss": 0.4006, "loss_nan_ranks": 0, "loss_rank_avg": 0.13769567012786865, "step": 120, "valid_targets_mean": 8704.1, "valid_targets_min": 1267 }, { "epoch": 0.6157635467980296, "grad_norm": 0.2681375523972611, "learning_rate": 3.9942720905593045e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356489658355713, "step": 125, "valid_targets_mean": 9078.8, "valid_targets_min": 1116 }, { "epoch": 0.6403940886699507, "grad_norm": 0.3358135332937248, "learning_rate": 3.991374717819092e-05, "loss": 0.3953, "loss_nan_ranks": 0, "loss_rank_avg": 0.14018836617469788, "step": 130, "valid_targets_mean": 9290.2, "valid_targets_min": 3098 }, { "epoch": 0.6650246305418719, "grad_norm": 0.3112144751609365, "learning_rate": 3.987887903896564e-05, "loss": 0.3917, "loss_nan_ranks": 0, "loss_rank_avg": 0.13718704879283905, "step": 135, "valid_targets_mean": 8901.3, "valid_targets_min": 2172 }, { "epoch": 0.6896551724137931, "grad_norm": 0.37646691210233985, "learning_rate": 3.9838126808786006e-05, "loss": 0.392, "loss_nan_ranks": 0, "loss_rank_avg": 0.1294289231300354, "step": 140, "valid_targets_mean": 9038.9, "valid_targets_min": 2299 }, { "epoch": 0.7142857142857143, "grad_norm": 0.2878512375569559, "learning_rate": 3.9791502550194803e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.1291024088859558, "step": 145, "valid_targets_mean": 9420.4, "valid_targets_min": 1430 }, { "epoch": 0.7389162561576355, "grad_norm": 0.24042444621370282, "learning_rate": 3.973902006383831e-05, "loss": 0.388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13769212365150452, "step": 150, "valid_targets_mean": 8992.5, "valid_targets_min": 897 }, { "epoch": 0.7635467980295566, "grad_norm": 0.310912607819178, "learning_rate": 3.968069488438139e-05, "loss": 0.3824, "loss_nan_ranks": 0, "loss_rank_avg": 0.12700515985488892, "step": 155, "valid_targets_mean": 8964.4, "valid_targets_min": 349 }, { "epoch": 0.7881773399014779, "grad_norm": 0.2655434672665567, "learning_rate": 3.9616544275909195e-05, "loss": 0.3812, "loss_nan_ranks": 0, "loss_rank_avg": 0.11202602088451385, "step": 160, "valid_targets_mean": 8055.7, "valid_targets_min": 434 }, { "epoch": 0.812807881773399, "grad_norm": 0.26868643875179116, "learning_rate": 3.954658722681712e-05, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.12862944602966309, "step": 165, "valid_targets_mean": 9318.7, "valid_targets_min": 4356 }, { "epoch": 0.8374384236453202, "grad_norm": 0.28733672498228385, "learning_rate": 3.9470844444190246e-05, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.13202814757823944, "step": 170, "valid_targets_mean": 9219.0, "valid_targets_min": 2164 }, { "epoch": 0.8620689655172413, "grad_norm": 0.28313069036684496, "learning_rate": 3.938933834767414e-05, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.1158134713768959, "step": 175, "valid_targets_mean": 8608.2, "valid_targets_min": 467 }, { "epoch": 0.8866995073891626, "grad_norm": 0.26052104379161, "learning_rate": 3.930209306283867e-05, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.11961864680051804, "step": 180, "valid_targets_mean": 7993.2, "valid_targets_min": 3574 }, { "epoch": 0.9113300492610837, "grad_norm": 0.258686222921349, "learning_rate": 3.9209134414036925e-05, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.10488448292016983, "step": 185, "valid_targets_mean": 8279.7, "valid_targets_min": 1136 }, { "epoch": 0.9359605911330049, "grad_norm": 0.26136503207196515, "learning_rate": 3.9110489916761276e-05, "loss": 0.3801, "loss_nan_ranks": 0, "loss_rank_avg": 0.13918861746788025, "step": 190, "valid_targets_mean": 9786.9, "valid_targets_min": 3188 }, { "epoch": 0.9605911330049262, "grad_norm": 0.2743644632084587, "learning_rate": 3.9006188769498865e-05, "loss": 0.3741, "loss_nan_ranks": 0, "loss_rank_avg": 0.13338595628738403, "step": 195, "valid_targets_mean": 10200.8, "valid_targets_min": 5539 }, { "epoch": 0.9852216748768473, "grad_norm": 0.24805804478278223, "learning_rate": 3.8896261845088955e-05, "loss": 0.3741, "loss_nan_ranks": 0, "loss_rank_avg": 0.12868089973926544, "step": 200, "valid_targets_mean": 8725.2, "valid_targets_min": 2242 }, { "epoch": 1.0098522167487685, "grad_norm": 0.27564411587309895, "learning_rate": 3.8780741681584636e-05, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.10980004072189331, "step": 205, "valid_targets_mean": 8534.5, "valid_targets_min": 3406 }, { "epoch": 1.0344827586206897, "grad_norm": 0.2788578201037295, "learning_rate": 3.865966247262166e-05, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.11370569467544556, "step": 210, "valid_targets_mean": 8309.0, "valid_targets_min": 3283 }, { "epoch": 1.0591133004926108, "grad_norm": 0.248625884092612, "learning_rate": 3.8533060057297235e-05, "loss": 0.3561, "loss_nan_ranks": 0, "loss_rank_avg": 0.12278437614440918, "step": 215, "valid_targets_mean": 8779.9, "valid_targets_min": 2576 }, { "epoch": 1.083743842364532, "grad_norm": 0.2684186476300992, "learning_rate": 3.840097190956175e-05, "loss": 0.3581, "loss_nan_ranks": 0, "loss_rank_avg": 0.1202234998345375, "step": 220, "valid_targets_mean": 9176.9, "valid_targets_min": 4137 }, { "epoch": 1.1083743842364533, "grad_norm": 0.25632340589308605, "learning_rate": 3.826343712712658e-05, "loss": 0.3503, "loss_nan_ranks": 0, "loss_rank_avg": 0.10630079358816147, "step": 225, "valid_targets_mean": 8646.0, "valid_targets_min": 1169 }, { "epoch": 1.1330049261083743, "grad_norm": 0.27498707289566154, "learning_rate": 3.81204964198913e-05, "loss": 0.3595, "loss_nan_ranks": 0, "loss_rank_avg": 0.1310245394706726, "step": 230, "valid_targets_mean": 9294.8, "valid_targets_min": 2075 }, { "epoch": 1.1576354679802956, "grad_norm": 0.2642154827340513, "learning_rate": 3.797219209789365e-05, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.1117246225476265, "step": 235, "valid_targets_mean": 8429.1, "valid_targets_min": 1930 }, { "epoch": 1.1822660098522166, "grad_norm": 0.2836720432865943, "learning_rate": 3.7818568058785906e-05, "loss": 0.3585, "loss_nan_ranks": 0, "loss_rank_avg": 0.11454908549785614, "step": 240, "valid_targets_mean": 8133.4, "valid_targets_min": 390 }, { "epoch": 1.206896551724138, "grad_norm": 0.3365645717447432, "learning_rate": 3.7659669774841274e-05, "loss": 0.3591, "loss_nan_ranks": 0, "loss_rank_avg": 0.11681550741195679, "step": 245, "valid_targets_mean": 8933.8, "valid_targets_min": 612 }, { "epoch": 1.2315270935960592, "grad_norm": 0.31327662356339114, "learning_rate": 3.749554427949426e-05, "loss": 0.351, "loss_nan_ranks": 0, "loss_rank_avg": 0.1127677857875824, "step": 250, "valid_targets_mean": 8292.3, "valid_targets_min": 2632 }, { "epoch": 1.2561576354679804, "grad_norm": 0.23261922775556002, "learning_rate": 3.7326240153418895e-05, "loss": 0.3501, "loss_nan_ranks": 0, "loss_rank_avg": 0.1259966492652893, "step": 255, "valid_targets_mean": 9653.4, "valid_targets_min": 3518 }, { "epoch": 1.2807881773399015, "grad_norm": 0.2525335827059363, "learning_rate": 3.7151807510148975e-05, "loss": 0.3582, "loss_nan_ranks": 0, "loss_rank_avg": 0.11980243027210236, "step": 260, "valid_targets_mean": 9146.5, "valid_targets_min": 1456 }, { "epoch": 1.3054187192118227, "grad_norm": 0.3310408716814809, "learning_rate": 3.697229798124464e-05, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.11194953322410583, "step": 265, "valid_targets_mean": 9172.4, "valid_targets_min": 3619 }, { "epoch": 1.3300492610837438, "grad_norm": 0.2648103991246787, "learning_rate": 3.678776470100954e-05, "loss": 0.3545, "loss_nan_ranks": 0, "loss_rank_avg": 0.1303391456604004, "step": 270, "valid_targets_mean": 8879.3, "valid_targets_min": 4536 }, { "epoch": 1.354679802955665, "grad_norm": 0.25449167975817794, "learning_rate": 3.659826229076326e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.1403665840625763, "step": 275, "valid_targets_mean": 10481.7, "valid_targets_min": 4554 }, { "epoch": 1.3793103448275863, "grad_norm": 0.2818830814758012, "learning_rate": 3.640384684267357e-05, "loss": 0.3469, "loss_nan_ranks": 0, "loss_rank_avg": 0.1068924218416214, "step": 280, "valid_targets_mean": 8961.1, "valid_targets_min": 3230 }, { "epoch": 1.4039408866995073, "grad_norm": 0.2693512065658683, "learning_rate": 3.6204575903153285e-05, "loss": 0.3532, "loss_nan_ranks": 0, "loss_rank_avg": 0.11001914739608765, "step": 285, "valid_targets_mean": 8176.8, "valid_targets_min": 3715 }, { "epoch": 1.4285714285714286, "grad_norm": 0.25311056093985246, "learning_rate": 3.600050845582669e-05, "loss": 0.3474, "loss_nan_ranks": 0, "loss_rank_avg": 0.12169365584850311, "step": 290, "valid_targets_mean": 9581.9, "valid_targets_min": 4873 }, { "epoch": 1.4532019704433496, "grad_norm": 0.2633607633838574, "learning_rate": 3.57917049040706e-05, "loss": 0.3484, "loss_nan_ranks": 0, "loss_rank_avg": 0.1089414581656456, "step": 295, "valid_targets_mean": 8416.3, "valid_targets_min": 425 }, { "epoch": 1.477832512315271, "grad_norm": 0.244398881708659, "learning_rate": 3.557822705313507e-05, "loss": 0.3518, "loss_nan_ranks": 0, "loss_rank_avg": 0.11258911341428757, "step": 300, "valid_targets_mean": 8594.2, "valid_targets_min": 2520 }, { "epoch": 1.5024630541871922, "grad_norm": 0.24997620578865998, "learning_rate": 3.5360138091849276e-05, "loss": 0.3469, "loss_nan_ranks": 0, "loss_rank_avg": 0.11095688492059708, "step": 305, "valid_targets_mean": 8580.7, "valid_targets_min": 3098 }, { "epoch": 1.5270935960591134, "grad_norm": 0.2245714689080682, "learning_rate": 3.513750257391778e-05, "loss": 0.3508, "loss_nan_ranks": 0, "loss_rank_avg": 0.1221964880824089, "step": 310, "valid_targets_mean": 9254.5, "valid_targets_min": 3804 }, { "epoch": 1.5517241379310345, "grad_norm": 0.25608384850505034, "learning_rate": 3.4910386398812784e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.1219601258635521, "step": 315, "valid_targets_mean": 8776.5, "valid_targets_min": 3199 }, { "epoch": 1.5763546798029555, "grad_norm": 0.3647096406549712, "learning_rate": 3.467885679226817e-05, "loss": 0.3485, "loss_nan_ranks": 0, "loss_rank_avg": 0.1203581690788269, "step": 320, "valid_targets_mean": 9392.0, "valid_targets_min": 3986 }, { "epoch": 1.6009852216748768, "grad_norm": 0.24346253896666947, "learning_rate": 3.444298228638077e-05, "loss": 0.3532, "loss_nan_ranks": 0, "loss_rank_avg": 0.12136732041835785, "step": 325, "valid_targets_mean": 9895.2, "valid_targets_min": 4423 }, { "epoch": 1.625615763546798, "grad_norm": 0.2428346066186375, "learning_rate": 3.420283269932514e-05, "loss": 0.345, "loss_nan_ranks": 0, "loss_rank_avg": 0.11618741601705551, "step": 330, "valid_targets_mean": 8762.7, "valid_targets_min": 3014 }, { "epoch": 1.6502463054187193, "grad_norm": 0.24297086102914583, "learning_rate": 3.3958479114687515e-05, "loss": 0.3458, "loss_nan_ranks": 0, "loss_rank_avg": 0.11359558999538422, "step": 335, "valid_targets_mean": 9019.1, "valid_targets_min": 2745 }, { "epoch": 1.6748768472906403, "grad_norm": 0.23444365454203103, "learning_rate": 3.3709993860425346e-05, "loss": 0.3473, "loss_nan_ranks": 0, "loss_rank_avg": 0.1222086101770401, "step": 340, "valid_targets_mean": 9328.7, "valid_targets_min": 2345 }, { "epoch": 1.6995073891625616, "grad_norm": 0.22889649248851593, "learning_rate": 3.345745048745838e-05, "loss": 0.3516, "loss_nan_ranks": 0, "loss_rank_avg": 0.11141127347946167, "step": 345, "valid_targets_mean": 8866.8, "valid_targets_min": 366 }, { "epoch": 1.7241379310344827, "grad_norm": 0.2343831962801274, "learning_rate": 3.320092374789782e-05, "loss": 0.3546, "loss_nan_ranks": 0, "loss_rank_avg": 0.11277924478054047, "step": 350, "valid_targets_mean": 8265.9, "valid_targets_min": 375 }, { "epoch": 1.748768472906404, "grad_norm": 0.2467687483921903, "learning_rate": 3.2940489572919917e-05, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.11757723987102509, "step": 355, "valid_targets_mean": 9222.7, "valid_targets_min": 3977 }, { "epoch": 1.7733990147783252, "grad_norm": 0.2576031897620249, "learning_rate": 3.267622505029053e-05, "loss": 0.3448, "loss_nan_ranks": 0, "loss_rank_avg": 0.1083230972290039, "step": 360, "valid_targets_mean": 8165.4, "valid_targets_min": 2847 }, { "epoch": 1.7980295566502464, "grad_norm": 0.2847455800013209, "learning_rate": 3.24082084015474e-05, "loss": 0.3492, "loss_nan_ranks": 0, "loss_rank_avg": 0.10450009256601334, "step": 365, "valid_targets_mean": 7683.3, "valid_targets_min": 384 }, { "epoch": 1.8226600985221675, "grad_norm": 0.24889533758877141, "learning_rate": 3.213651895884683e-05, "loss": 0.3508, "loss_nan_ranks": 0, "loss_rank_avg": 0.12324145436286926, "step": 370, "valid_targets_mean": 9273.4, "valid_targets_min": 339 }, { "epoch": 1.8472906403940885, "grad_norm": 0.2509726775951525, "learning_rate": 3.1861237141481506e-05, "loss": 0.3463, "loss_nan_ranks": 0, "loss_rank_avg": 0.10529517382383347, "step": 375, "valid_targets_mean": 7376.7, "valid_targets_min": 2690 }, { "epoch": 1.8719211822660098, "grad_norm": 0.30513053329486167, "learning_rate": 3.158244443207671e-05, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.12324077636003494, "step": 380, "valid_targets_mean": 8858.1, "valid_targets_min": 2118 }, { "epoch": 1.896551724137931, "grad_norm": 0.257201744023466, "learning_rate": 3.130022335247163e-05, "loss": 0.3417, "loss_nan_ranks": 0, "loss_rank_avg": 0.12891273200511932, "step": 385, "valid_targets_mean": 9345.1, "valid_targets_min": 5281 }, { "epoch": 1.9211822660098523, "grad_norm": 0.24998558537607088, "learning_rate": 3.101465743929318e-05, "loss": 0.345, "loss_nan_ranks": 0, "loss_rank_avg": 0.11640559881925583, "step": 390, "valid_targets_mean": 9220.7, "valid_targets_min": 2002 }, { "epoch": 1.9458128078817734, "grad_norm": 0.22830611933571576, "learning_rate": 3.072583121922939e-05, "loss": 0.3455, "loss_nan_ranks": 0, "loss_rank_avg": 0.11128903925418854, "step": 395, "valid_targets_mean": 8883.6, "valid_targets_min": 2488 }, { "epoch": 1.9704433497536946, "grad_norm": 0.22396942549404023, "learning_rate": 3.0433830184009694e-05, "loss": 0.3453, "loss_nan_ranks": 0, "loss_rank_avg": 0.11893004179000854, "step": 400, "valid_targets_mean": 8981.7, "valid_targets_min": 2763 }, { "epoch": 1.9950738916256157, "grad_norm": 0.24644092979609167, "learning_rate": 3.0138740765099724e-05, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.1190701425075531, "step": 405, "valid_targets_mean": 8123.2, "valid_targets_min": 415 }, { "epoch": 2.019704433497537, "grad_norm": 0.2650309894835959, "learning_rate": 2.984065030811776e-05, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.11054711788892746, "step": 410, "valid_targets_mean": 8905.1, "valid_targets_min": 859 }, { "epoch": 2.044334975369458, "grad_norm": 0.22732032307524816, "learning_rate": 2.9539647046980716e-05, "loss": 0.3208, "loss_nan_ranks": 0, "loss_rank_avg": 0.10819808393716812, "step": 415, "valid_targets_mean": 8779.8, "valid_targets_min": 2486 }, { "epoch": 2.0689655172413794, "grad_norm": 0.25644205605632525, "learning_rate": 2.923582007778716e-05, "loss": 0.3251, "loss_nan_ranks": 0, "loss_rank_avg": 0.10985960066318512, "step": 420, "valid_targets_mean": 9446.7, "valid_targets_min": 3496 }, { "epoch": 2.0935960591133007, "grad_norm": 0.24088638698690903, "learning_rate": 2.8929259332445096e-05, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.10128775238990784, "step": 425, "valid_targets_mean": 9092.7, "valid_targets_min": 983 }, { "epoch": 2.1182266009852215, "grad_norm": 0.24541067677362202, "learning_rate": 2.8620055552052403e-05, "loss": 0.3258, "loss_nan_ranks": 0, "loss_rank_avg": 0.11216947436332703, "step": 430, "valid_targets_mean": 8788.4, "valid_targets_min": 3614 }, { "epoch": 2.142857142857143, "grad_norm": 0.2482079004721641, "learning_rate": 2.8308300260037734e-05, "loss": 0.3224, "loss_nan_ranks": 0, "loss_rank_avg": 0.1115947961807251, "step": 435, "valid_targets_mean": 9745.7, "valid_targets_min": 3675 }, { "epoch": 2.167487684729064, "grad_norm": 0.22913552233839607, "learning_rate": 2.7994085735069814e-05, "loss": 0.325, "loss_nan_ranks": 0, "loss_rank_avg": 0.10517291724681854, "step": 440, "valid_targets_mean": 8710.5, "valid_targets_min": 3662 }, { "epoch": 2.1921182266009853, "grad_norm": 0.24146267991687, "learning_rate": 2.767750498374327e-05, "loss": 0.3197, "loss_nan_ranks": 0, "loss_rank_avg": 0.12296651303768158, "step": 445, "valid_targets_mean": 8849.4, "valid_targets_min": 535 }, { "epoch": 2.2167487684729066, "grad_norm": 0.21827048442446823, "learning_rate": 2.735865171304889e-05, "loss": 0.3188, "loss_nan_ranks": 0, "loss_rank_avg": 0.10590243339538574, "step": 450, "valid_targets_mean": 8600.2, "valid_targets_min": 3249 }, { "epoch": 2.2413793103448274, "grad_norm": 0.23028098738931826, "learning_rate": 2.703762030263666e-05, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.09583649784326553, "step": 455, "valid_targets_mean": 8295.3, "valid_targets_min": 3305 }, { "epoch": 2.2660098522167487, "grad_norm": 0.22275379600509554, "learning_rate": 2.6714505776879666e-05, "loss": 0.3212, "loss_nan_ranks": 0, "loss_rank_avg": 0.1080237478017807, "step": 460, "valid_targets_mean": 9274.6, "valid_targets_min": 2483 }, { "epoch": 2.29064039408867, "grad_norm": 0.2207031504793314, "learning_rate": 2.6389403776747116e-05, "loss": 0.3182, "loss_nan_ranks": 0, "loss_rank_avg": 0.11194107681512833, "step": 465, "valid_targets_mean": 9042.2, "valid_targets_min": 4617 }, { "epoch": 2.315270935960591, "grad_norm": 0.2241399700657853, "learning_rate": 2.606241053149492e-05, "loss": 0.3244, "loss_nan_ranks": 0, "loss_rank_avg": 0.10940317064523697, "step": 470, "valid_targets_mean": 9518.9, "valid_targets_min": 1468 }, { "epoch": 2.3399014778325125, "grad_norm": 0.24058313899023676, "learning_rate": 2.5733622830182095e-05, "loss": 0.327, "loss_nan_ranks": 0, "loss_rank_avg": 0.09529858827590942, "step": 475, "valid_targets_mean": 8239.7, "valid_targets_min": 251 }, { "epoch": 2.3645320197044333, "grad_norm": 0.23562354398146523, "learning_rate": 2.5403137993021483e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.09677626192569733, "step": 480, "valid_targets_mean": 8106.5, "valid_targets_min": 639 }, { "epoch": 2.3891625615763545, "grad_norm": 0.2530852630178121, "learning_rate": 2.5071053842573264e-05, "loss": 0.3223, "loss_nan_ranks": 0, "loss_rank_avg": 0.10545364022254944, "step": 485, "valid_targets_mean": 9255.8, "valid_targets_min": 1844 }, { "epoch": 2.413793103448276, "grad_norm": 0.21881629429554156, "learning_rate": 2.473746867478973e-05, "loss": 0.3208, "loss_nan_ranks": 0, "loss_rank_avg": 0.11455640196800232, "step": 490, "valid_targets_mean": 9913.8, "valid_targets_min": 3181 }, { "epoch": 2.438423645320197, "grad_norm": 0.2524273469977475, "learning_rate": 2.4402481229919982e-05, "loss": 0.325, "loss_nan_ranks": 0, "loss_rank_avg": 0.10793764144182205, "step": 495, "valid_targets_mean": 9409.1, "valid_targets_min": 386 }, { "epoch": 2.4630541871921183, "grad_norm": 0.21182409471855082, "learning_rate": 2.406619066328311e-05, "loss": 0.321, "loss_nan_ranks": 0, "loss_rank_avg": 0.10518929362297058, "step": 500, "valid_targets_mean": 9363.0, "valid_targets_min": 4145 }, { "epoch": 2.4876847290640396, "grad_norm": 0.2191040471839855, "learning_rate": 2.3728696515918496e-05, "loss": 0.3213, "loss_nan_ranks": 0, "loss_rank_avg": 0.10714466869831085, "step": 505, "valid_targets_mean": 8711.5, "valid_targets_min": 667 }, { "epoch": 2.512315270935961, "grad_norm": 0.2448105118684568, "learning_rate": 2.3390098685121938e-05, "loss": 0.322, "loss_nan_ranks": 0, "loss_rank_avg": 0.09540620446205139, "step": 510, "valid_targets_mean": 8153.1, "valid_targets_min": 3180 }, { "epoch": 2.5369458128078817, "grad_norm": 0.220518003206731, "learning_rate": 2.3050497394876363e-05, "loss": 0.3222, "loss_nan_ranks": 0, "loss_rank_avg": 0.10619120299816132, "step": 515, "valid_targets_mean": 8894.3, "valid_targets_min": 2359 }, { "epoch": 2.561576354679803, "grad_norm": 0.23717022293975634, "learning_rate": 2.2709993166185803e-05, "loss": 0.3341, "loss_nan_ranks": 0, "loss_rank_avg": 0.10815994441509247, "step": 520, "valid_targets_mean": 8602.2, "valid_targets_min": 4126 }, { "epoch": 2.586206896551724, "grad_norm": 0.2312939895442153, "learning_rate": 2.2368686787321475e-05, "loss": 0.3242, "loss_nan_ranks": 0, "loss_rank_avg": 0.10106731951236725, "step": 525, "valid_targets_mean": 7950.1, "valid_targets_min": 410 }, { "epoch": 2.6108374384236455, "grad_norm": 0.22544526165656265, "learning_rate": 2.2026679283988727e-05, "loss": 0.3196, "loss_nan_ranks": 0, "loss_rank_avg": 0.10532703995704651, "step": 530, "valid_targets_mean": 9430.9, "valid_targets_min": 4000 }, { "epoch": 2.6354679802955667, "grad_norm": 0.20231326994814502, "learning_rate": 2.168407188942373e-05, "loss": 0.3221, "loss_nan_ranks": 0, "loss_rank_avg": 0.11423169821500778, "step": 535, "valid_targets_mean": 9679.7, "valid_targets_min": 2166 }, { "epoch": 2.6600985221674875, "grad_norm": 0.20631608444099137, "learning_rate": 2.1340966014428744e-05, "loss": 0.322, "loss_nan_ranks": 0, "loss_rank_avg": 0.09936245530843735, "step": 540, "valid_targets_mean": 8590.2, "valid_targets_min": 366 }, { "epoch": 2.684729064039409, "grad_norm": 0.2264625162075009, "learning_rate": 2.0997463217354803e-05, "loss": 0.3207, "loss_nan_ranks": 0, "loss_rank_avg": 0.11137133836746216, "step": 545, "valid_targets_mean": 8606.8, "valid_targets_min": 2707 }, { "epoch": 2.70935960591133, "grad_norm": 0.19869783835319854, "learning_rate": 2.065366517404071e-05, "loss": 0.3208, "loss_nan_ranks": 0, "loss_rank_avg": 0.11436265707015991, "step": 550, "valid_targets_mean": 9982.7, "valid_targets_min": 5207 }, { "epoch": 2.7339901477832513, "grad_norm": 0.21361086043176336, "learning_rate": 2.030967364771733e-05, "loss": 0.317, "loss_nan_ranks": 0, "loss_rank_avg": 0.10924769937992096, "step": 555, "valid_targets_mean": 8423.0, "valid_targets_min": 3589 }, { "epoch": 2.7586206896551726, "grad_norm": 0.2159779190682014, "learning_rate": 1.996559045888593e-05, "loss": 0.3193, "loss_nan_ranks": 0, "loss_rank_avg": 0.11431802809238434, "step": 560, "valid_targets_mean": 9245.8, "valid_targets_min": 796 }, { "epoch": 2.7832512315270934, "grad_norm": 0.19784699425306007, "learning_rate": 1.9621517455179627e-05, "loss": 0.323, "loss_nan_ranks": 0, "loss_rank_avg": 0.11673343181610107, "step": 565, "valid_targets_mean": 9697.0, "valid_targets_min": 4763 }, { "epoch": 2.8078817733990147, "grad_norm": 0.21421367114423392, "learning_rate": 1.9277556481216737e-05, "loss": 0.3192, "loss_nan_ranks": 0, "loss_rank_avg": 0.10330238938331604, "step": 570, "valid_targets_mean": 8288.4, "valid_targets_min": 424 }, { "epoch": 2.832512315270936, "grad_norm": 0.19836582938289005, "learning_rate": 1.893380934845514e-05, "loss": 0.3226, "loss_nan_ranks": 0, "loss_rank_avg": 0.12032057344913483, "step": 575, "valid_targets_mean": 8848.4, "valid_targets_min": 2745 }, { "epoch": 2.857142857142857, "grad_norm": 0.21631083193557546, "learning_rate": 1.8590377805056306e-05, "loss": 0.3192, "loss_nan_ranks": 0, "loss_rank_avg": 0.10158735513687134, "step": 580, "valid_targets_mean": 8897.4, "valid_targets_min": 2826 }, { "epoch": 2.8817733990147785, "grad_norm": 0.20981357344340126, "learning_rate": 1.8247363505768177e-05, "loss": 0.3153, "loss_nan_ranks": 0, "loss_rank_avg": 0.09952034801244736, "step": 585, "valid_targets_mean": 8201.6, "valid_targets_min": 1306 }, { "epoch": 2.9064039408866993, "grad_norm": 0.2271419153781403, "learning_rate": 1.7904867981835617e-05, "loss": 0.3229, "loss_nan_ranks": 0, "loss_rank_avg": 0.11091038584709167, "step": 590, "valid_targets_mean": 9144.8, "valid_targets_min": 1621 }, { "epoch": 2.9310344827586206, "grad_norm": 0.20711350475630588, "learning_rate": 1.7562992610947517e-05, "loss": 0.3165, "loss_nan_ranks": 0, "loss_rank_avg": 0.0879313200712204, "step": 595, "valid_targets_mean": 6960.7, "valid_targets_min": 341 }, { "epoch": 2.955665024630542, "grad_norm": 0.20228134899502237, "learning_rate": 1.7221838587229215e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.12365391105413437, "step": 600, "valid_targets_mean": 9605.3, "valid_targets_min": 417 }, { "epoch": 2.980295566502463, "grad_norm": 0.2081591697388915, "learning_rate": 1.6881506891289386e-05, "loss": 0.3164, "loss_nan_ranks": 0, "loss_rank_avg": 0.10903681814670563, "step": 605, "valid_targets_mean": 8928.9, "valid_targets_min": 2255 }, { "epoch": 3.0049261083743843, "grad_norm": 0.23994026035218605, "learning_rate": 1.654209826033004e-05, "loss": 0.3148, "loss_nan_ranks": 0, "loss_rank_avg": 0.10147841274738312, "step": 610, "valid_targets_mean": 8829.4, "valid_targets_min": 4278 }, { "epoch": 3.0295566502463056, "grad_norm": 0.22674405029866287, "learning_rate": 1.6203713158328626e-05, "loss": 0.3046, "loss_nan_ranks": 0, "loss_rank_avg": 0.10697513818740845, "step": 615, "valid_targets_mean": 9735.6, "valid_targets_min": 755 }, { "epoch": 3.0541871921182264, "grad_norm": 0.23064581576560553, "learning_rate": 1.586645174630094e-05, "loss": 0.2992, "loss_nan_ranks": 0, "loss_rank_avg": 0.0928591638803482, "step": 620, "valid_targets_mean": 8492.0, "valid_targets_min": 3198 }, { "epoch": 3.0788177339901477, "grad_norm": 0.23288072128103748, "learning_rate": 1.5530413852653816e-05, "loss": 0.3028, "loss_nan_ranks": 0, "loss_rank_avg": 0.10058435797691345, "step": 625, "valid_targets_mean": 8571.2, "valid_targets_min": 228 }, { "epoch": 3.103448275862069, "grad_norm": 0.21989355607355687, "learning_rate": 1.5195698943636135e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.09678147733211517, "step": 630, "valid_targets_mean": 8105.6, "valid_targets_min": 1260 }, { "epoch": 3.12807881773399, "grad_norm": 0.19891006352522667, "learning_rate": 1.4862406093897175e-05, "loss": 0.2999, "loss_nan_ranks": 0, "loss_rank_avg": 0.09648677706718445, "step": 635, "valid_targets_mean": 8988.8, "valid_targets_min": 363 }, { "epoch": 3.1527093596059115, "grad_norm": 0.24655653698082503, "learning_rate": 1.4530633957160733e-05, "loss": 0.3012, "loss_nan_ranks": 0, "loss_rank_avg": 0.08152036368846893, "step": 640, "valid_targets_mean": 7848.4, "valid_targets_min": 2993 }, { "epoch": 3.1773399014778327, "grad_norm": 0.22762238372136157, "learning_rate": 1.4200480737023943e-05, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.10375897586345673, "step": 645, "valid_targets_mean": 9312.4, "valid_targets_min": 2486 }, { "epoch": 3.2019704433497536, "grad_norm": 0.22534789337999453, "learning_rate": 1.3872044157889297e-05, "loss": 0.3019, "loss_nan_ranks": 0, "loss_rank_avg": 0.09716019779443741, "step": 650, "valid_targets_mean": 8545.7, "valid_targets_min": 1935 }, { "epoch": 3.226600985221675, "grad_norm": 0.20514988270828538, "learning_rate": 1.3545421436038477e-05, "loss": 0.3094, "loss_nan_ranks": 0, "loss_rank_avg": 0.10193373262882233, "step": 655, "valid_targets_mean": 8863.9, "valid_targets_min": 2907 }, { "epoch": 3.251231527093596, "grad_norm": 0.1978171411281838, "learning_rate": 1.3220709250856656e-05, "loss": 0.3058, "loss_nan_ranks": 0, "loss_rank_avg": 0.0844145119190216, "step": 660, "valid_targets_mean": 8168.6, "valid_targets_min": 2005 }, { "epoch": 3.2758620689655173, "grad_norm": 0.20349161652396405, "learning_rate": 1.2898003716215626e-05, "loss": 0.2999, "loss_nan_ranks": 0, "loss_rank_avg": 0.10471087694168091, "step": 665, "valid_targets_mean": 9535.9, "valid_targets_min": 3279 }, { "epoch": 3.3004926108374386, "grad_norm": 0.19864565640995638, "learning_rate": 1.2577400352024426e-05, "loss": 0.3011, "loss_nan_ranks": 0, "loss_rank_avg": 0.09200199693441391, "step": 670, "valid_targets_mean": 8635.4, "valid_targets_min": 2501 }, { "epoch": 3.3251231527093594, "grad_norm": 0.2096280531918653, "learning_rate": 1.2258994055955658e-05, "loss": 0.2993, "loss_nan_ranks": 0, "loss_rank_avg": 0.09174078702926636, "step": 675, "valid_targets_mean": 8636.1, "valid_targets_min": 2381 }, { "epoch": 3.3497536945812807, "grad_norm": 0.21102625245478873, "learning_rate": 1.1942879075356135e-05, "loss": 0.3, "loss_nan_ranks": 0, "loss_rank_avg": 0.10436161607503891, "step": 680, "valid_targets_mean": 9196.2, "valid_targets_min": 2880 }, { "epoch": 3.374384236453202, "grad_norm": 0.20074354219907545, "learning_rate": 1.1629148979349836e-05, "loss": 0.2964, "loss_nan_ranks": 0, "loss_rank_avg": 0.09593068063259125, "step": 685, "valid_targets_mean": 9236.0, "valid_targets_min": 3690 }, { "epoch": 3.399014778325123, "grad_norm": 0.21770217204418577, "learning_rate": 1.1317896631141814e-05, "loss": 0.3052, "loss_nan_ranks": 0, "loss_rank_avg": 0.10593973100185394, "step": 690, "valid_targets_mean": 8596.7, "valid_targets_min": 467 }, { "epoch": 3.4236453201970445, "grad_norm": 0.202033129923407, "learning_rate": 1.1009214160530875e-05, "loss": 0.3036, "loss_nan_ranks": 0, "loss_rank_avg": 0.08792443573474884, "step": 695, "valid_targets_mean": 7999.6, "valid_targets_min": 3034 }, { "epoch": 3.4482758620689653, "grad_norm": 0.19377905050918098, "learning_rate": 1.0703192936639481e-05, "loss": 0.3022, "loss_nan_ranks": 0, "loss_rank_avg": 0.08651701360940933, "step": 700, "valid_targets_mean": 8115.2, "valid_targets_min": 2167 }, { "epoch": 3.4729064039408866, "grad_norm": 0.18949986820075895, "learning_rate": 1.0399923540868712e-05, "loss": 0.3059, "loss_nan_ranks": 0, "loss_rank_avg": 0.10459959506988525, "step": 705, "valid_targets_mean": 9690.8, "valid_targets_min": 3185 }, { "epoch": 3.497536945812808, "grad_norm": 0.2011804523127738, "learning_rate": 1.0099495740086454e-05, "loss": 0.3009, "loss_nan_ranks": 0, "loss_rank_avg": 0.10490408539772034, "step": 710, "valid_targets_mean": 8907.2, "valid_targets_min": 2179 }, { "epoch": 3.522167487684729, "grad_norm": 0.20548088230585662, "learning_rate": 9.801998460056643e-06, "loss": 0.3038, "loss_nan_ranks": 0, "loss_rank_avg": 0.10383732616901398, "step": 715, "valid_targets_mean": 8324.9, "valid_targets_min": 983 }, { "epoch": 3.5467980295566504, "grad_norm": 0.19683329981111766, "learning_rate": 9.507519759117546e-06, "loss": 0.2991, "loss_nan_ranks": 0, "loss_rank_avg": 0.09702172875404358, "step": 720, "valid_targets_mean": 8744.2, "valid_targets_min": 3967 }, { "epoch": 3.571428571428571, "grad_norm": 0.19625336438708468, "learning_rate": 9.216146802116676e-06, "loss": 0.3053, "loss_nan_ranks": 0, "loss_rank_avg": 0.09151807427406311, "step": 725, "valid_targets_mean": 8401.1, "valid_targets_min": 1020 }, { "epoch": 3.596059113300493, "grad_norm": 0.18708247392345115, "learning_rate": 8.92796583461031e-06, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.09643843024969101, "step": 730, "valid_targets_mean": 8835.6, "valid_targets_min": 796 }, { "epoch": 3.6206896551724137, "grad_norm": 0.19360040135948756, "learning_rate": 8.643062157335e-06, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.08726485073566437, "step": 735, "valid_targets_mean": 8767.6, "valid_targets_min": 1621 }, { "epoch": 3.645320197044335, "grad_norm": 0.18440066609355732, "learning_rate": 8.361520100958856e-06, "loss": 0.2982, "loss_nan_ranks": 0, "loss_rank_avg": 0.08847818523645401, "step": 740, "valid_targets_mean": 9271.4, "valid_targets_min": 4099 }, { "epoch": 3.6699507389162562, "grad_norm": 0.18646457682094164, "learning_rate": 8.083423001119855e-06, "loss": 0.3, "loss_nan_ranks": 0, "loss_rank_avg": 0.09122344851493835, "step": 745, "valid_targets_mean": 8637.3, "valid_targets_min": 2856 }, { "epoch": 3.6945812807881775, "grad_norm": 0.20175487117378538, "learning_rate": 7.80885317375877e-06, "loss": 0.2991, "loss_nan_ranks": 0, "loss_rank_avg": 0.10354309529066086, "step": 750, "valid_targets_mean": 8972.3, "valid_targets_min": 390 }, { "epoch": 3.7192118226600988, "grad_norm": 0.18258436718922763, "learning_rate": 7.537891890753879e-06, "loss": 0.2975, "loss_nan_ranks": 0, "loss_rank_avg": 0.10517409443855286, "step": 755, "valid_targets_mean": 9500.0, "valid_targets_min": 3694 }, { "epoch": 3.7438423645320196, "grad_norm": 0.1888454473684914, "learning_rate": 7.27061935586471e-06, "loss": 0.3027, "loss_nan_ranks": 0, "loss_rank_avg": 0.10511292517185211, "step": 760, "valid_targets_mean": 9314.8, "valid_targets_min": 3498 }, { "epoch": 3.768472906403941, "grad_norm": 0.1906383407925554, "learning_rate": 7.007114680991995e-06, "loss": 0.3049, "loss_nan_ranks": 0, "loss_rank_avg": 0.11089085787534714, "step": 765, "valid_targets_mean": 9594.2, "valid_targets_min": 3836 }, { "epoch": 3.793103448275862, "grad_norm": 0.18789808493594767, "learning_rate": 6.747455862760723e-06, "loss": 0.3018, "loss_nan_ranks": 0, "loss_rank_avg": 0.0920998752117157, "step": 770, "valid_targets_mean": 7893.3, "valid_targets_min": 3364 }, { "epoch": 3.8177339901477834, "grad_norm": 0.1946470476381285, "learning_rate": 6.491719759433414e-06, "loss": 0.3021, "loss_nan_ranks": 0, "loss_rank_avg": 0.09941431879997253, "step": 775, "valid_targets_mean": 8573.0, "valid_targets_min": 2428 }, { "epoch": 3.8423645320197046, "grad_norm": 0.18335741825763685, "learning_rate": 6.239982068160251e-06, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.10005414485931396, "step": 780, "valid_targets_mean": 8930.7, "valid_targets_min": 3592 }, { "epoch": 3.8669950738916254, "grad_norm": 0.19394167335220494, "learning_rate": 5.9923173025729895e-06, "loss": 0.3018, "loss_nan_ranks": 0, "loss_rank_avg": 0.0917779877781868, "step": 785, "valid_targets_mean": 8102.1, "valid_targets_min": 2504 }, { "epoch": 3.8916256157635467, "grad_norm": 0.18120225767769269, "learning_rate": 5.748798770729071e-06, "loss": 0.2968, "loss_nan_ranks": 0, "loss_rank_avg": 0.09662172943353653, "step": 790, "valid_targets_mean": 9064.0, "valid_targets_min": 2340 }, { "epoch": 3.916256157635468, "grad_norm": 0.19190077238373823, "learning_rate": 5.509498553412727e-06, "loss": 0.3041, "loss_nan_ranks": 0, "loss_rank_avg": 0.10005126893520355, "step": 795, "valid_targets_mean": 8898.5, "valid_targets_min": 794 }, { "epoch": 3.9408866995073892, "grad_norm": 0.17974352936982052, "learning_rate": 5.274487482799206e-06, "loss": 0.2993, "loss_nan_ranks": 0, "loss_rank_avg": 0.09953851997852325, "step": 800, "valid_targets_mean": 8959.3, "valid_targets_min": 322 }, { "epoch": 3.9655172413793105, "grad_norm": 0.18698159849014778, "learning_rate": 5.04383512148871e-06, "loss": 0.307, "loss_nan_ranks": 0, "loss_rank_avg": 0.10734543204307556, "step": 805, "valid_targets_mean": 9183.6, "valid_targets_min": 3696 }, { "epoch": 3.9901477832512313, "grad_norm": 0.18442090586080964, "learning_rate": 4.817609741916009e-06, "loss": 0.3056, "loss_nan_ranks": 0, "loss_rank_avg": 0.0857645720243454, "step": 810, "valid_targets_mean": 7837.0, "valid_targets_min": 2597 }, { "epoch": 4.014778325123153, "grad_norm": 0.18552650025158898, "learning_rate": 4.595878306142059e-06, "loss": 0.2883, "loss_nan_ranks": 0, "loss_rank_avg": 0.09557130932807922, "step": 815, "valid_targets_mean": 8530.3, "valid_targets_min": 1990 }, { "epoch": 4.039408866995074, "grad_norm": 0.19107540271966575, "learning_rate": 4.37870644603336e-06, "loss": 0.2915, "loss_nan_ranks": 0, "loss_rank_avg": 0.08814047276973724, "step": 820, "valid_targets_mean": 8476.8, "valid_targets_min": 2709 }, { "epoch": 4.064039408866995, "grad_norm": 0.21852387313861962, "learning_rate": 4.1661584438351645e-06, "loss": 0.2911, "loss_nan_ranks": 0, "loss_rank_avg": 0.08739292621612549, "step": 825, "valid_targets_mean": 8461.2, "valid_targets_min": 352 }, { "epoch": 4.088669950738916, "grad_norm": 0.1839668348467172, "learning_rate": 3.958297213144084e-06, "loss": 0.2928, "loss_nan_ranks": 0, "loss_rank_avg": 0.07950074970722198, "step": 830, "valid_targets_mean": 7643.1, "valid_targets_min": 1946 }, { "epoch": 4.113300492610837, "grad_norm": 0.18845310147717448, "learning_rate": 3.7551842802858772e-06, "loss": 0.2892, "loss_nan_ranks": 0, "loss_rank_avg": 0.10723338276147842, "step": 835, "valid_targets_mean": 9729.5, "valid_targets_min": 4630 }, { "epoch": 4.137931034482759, "grad_norm": 0.1940491672102659, "learning_rate": 3.5568797661038004e-06, "loss": 0.2913, "loss_nan_ranks": 0, "loss_rank_avg": 0.09230605512857437, "step": 840, "valid_targets_mean": 8704.1, "valid_targets_min": 3915 }, { "epoch": 4.16256157635468, "grad_norm": 0.18469213419035746, "learning_rate": 3.3634423681630392e-06, "loss": 0.2926, "loss_nan_ranks": 0, "loss_rank_avg": 0.10094676911830902, "step": 845, "valid_targets_mean": 9719.5, "valid_targets_min": 4368 }, { "epoch": 4.187192118226601, "grad_norm": 0.18737436904852822, "learning_rate": 3.174929343376374e-06, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.09700489789247513, "step": 850, "valid_targets_mean": 9506.8, "valid_targets_min": 859 }, { "epoch": 4.211822660098522, "grad_norm": 0.18423499331496113, "learning_rate": 2.991396491056331e-06, "loss": 0.2835, "loss_nan_ranks": 0, "loss_rank_avg": 0.10383116453886032, "step": 855, "valid_targets_mean": 9004.8, "valid_targets_min": 2993 }, { "epoch": 4.236453201970443, "grad_norm": 0.19530539384844203, "learning_rate": 2.812898136398705e-06, "loss": 0.2938, "loss_nan_ranks": 0, "loss_rank_avg": 0.10272261500358582, "step": 860, "valid_targets_mean": 10417.8, "valid_targets_min": 2835 }, { "epoch": 4.261083743842365, "grad_norm": 0.1768936566418224, "learning_rate": 2.6394871144024926e-06, "loss": 0.2879, "loss_nan_ranks": 0, "loss_rank_avg": 0.08824898302555084, "step": 865, "valid_targets_mean": 8173.8, "valid_targets_min": 761 }, { "epoch": 4.285714285714286, "grad_norm": 0.18803866498160385, "learning_rate": 2.471214754230866e-06, "loss": 0.2894, "loss_nan_ranks": 0, "loss_rank_avg": 0.09874047338962555, "step": 870, "valid_targets_mean": 8255.1, "valid_targets_min": 3232 }, { "epoch": 4.310344827586207, "grad_norm": 0.18305855723051326, "learning_rate": 2.3081308640178945e-06, "loss": 0.2893, "loss_nan_ranks": 0, "loss_rank_avg": 0.09291456639766693, "step": 875, "valid_targets_mean": 8635.8, "valid_targets_min": 3871 }, { "epoch": 4.334975369458128, "grad_norm": 0.17966738406863703, "learning_rate": 2.1502837161254873e-06, "loss": 0.2916, "loss_nan_ranks": 0, "loss_rank_avg": 0.10657632350921631, "step": 880, "valid_targets_mean": 9381.9, "valid_targets_min": 3764 }, { "epoch": 4.359605911330049, "grad_norm": 0.16961839703667175, "learning_rate": 1.9977200328548953e-06, "loss": 0.2853, "loss_nan_ranks": 0, "loss_rank_avg": 0.09088948369026184, "step": 885, "valid_targets_mean": 8735.8, "valid_targets_min": 2093 }, { "epoch": 4.384236453201971, "grad_norm": 0.17218142432918168, "learning_rate": 1.8504849726170637e-06, "loss": 0.2913, "loss_nan_ranks": 0, "loss_rank_avg": 0.09188289195299149, "step": 890, "valid_targets_mean": 8830.7, "valid_targets_min": 2083 }, { "epoch": 4.4088669950738915, "grad_norm": 0.1813446374116515, "learning_rate": 1.7086221165658544e-06, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.09599706530570984, "step": 895, "valid_targets_mean": 8826.4, "valid_targets_min": 3172 }, { "epoch": 4.433497536945813, "grad_norm": 0.18498320583010483, "learning_rate": 1.5721734556981761e-06, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.09900371730327606, "step": 900, "valid_targets_mean": 8690.2, "valid_targets_min": 2278 }, { "epoch": 4.458128078817734, "grad_norm": 0.20510849376299953, "learning_rate": 1.4411793784247263e-06, "loss": 0.2921, "loss_nan_ranks": 0, "loss_rank_avg": 0.10054295510053635, "step": 905, "valid_targets_mean": 8741.5, "valid_targets_min": 467 }, { "epoch": 4.482758620689655, "grad_norm": 0.1922020499715368, "learning_rate": 1.3156786586151916e-06, "loss": 0.294, "loss_nan_ranks": 0, "loss_rank_avg": 0.10521981865167618, "step": 910, "valid_targets_mean": 9091.2, "valid_targets_min": 3243 }, { "epoch": 4.5073891625615765, "grad_norm": 0.17566941817494466, "learning_rate": 1.195708444121253e-06, "loss": 0.2912, "loss_nan_ranks": 0, "loss_rank_avg": 0.0981191098690033, "step": 915, "valid_targets_mean": 8294.6, "valid_targets_min": 3384 }, { "epoch": 4.532019704433497, "grad_norm": 0.18568976979390986, "learning_rate": 1.0813042457809497e-06, "loss": 0.2891, "loss_nan_ranks": 0, "loss_rank_avg": 0.08985172212123871, "step": 920, "valid_targets_mean": 8663.1, "valid_targets_min": 667 }, { "epoch": 4.556650246305419, "grad_norm": 0.1809563651428104, "learning_rate": 9.724999269075598e-07, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.09896715730428696, "step": 925, "valid_targets_mean": 8589.9, "valid_targets_min": 399 }, { "epoch": 4.58128078817734, "grad_norm": 0.1826346698165506, "learning_rate": 8.693276932661732e-07, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.09368403255939484, "step": 930, "valid_targets_mean": 8382.1, "valid_targets_min": 2243 }, { "epoch": 4.605911330049262, "grad_norm": 0.17397784390869644, "learning_rate": 7.718180835408584e-07, "loss": 0.2899, "loss_nan_ranks": 0, "loss_rank_avg": 0.09835617244243622, "step": 935, "valid_targets_mean": 8955.2, "valid_targets_min": 378 }, { "epoch": 4.630541871921182, "grad_norm": 0.17704960582110957, "learning_rate": 6.799999602953189e-07, "loss": 0.2903, "loss_nan_ranks": 0, "loss_rank_avg": 0.09321548044681549, "step": 940, "valid_targets_mean": 8988.6, "valid_targets_min": 2533 }, { "epoch": 4.655172413793103, "grad_norm": 0.1741304537218507, "learning_rate": 5.939005014296428e-07, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.09430979192256927, "step": 945, "valid_targets_mean": 8396.6, "valid_targets_min": 337 }, { "epoch": 4.679802955665025, "grad_norm": 0.16592953591918666, "learning_rate": 5.135451921357337e-07, "loss": 0.2939, "loss_nan_ranks": 0, "loss_rank_avg": 0.10379470884799957, "step": 950, "valid_targets_mean": 9645.2, "valid_targets_min": 2567 }, { "epoch": 4.704433497536946, "grad_norm": 0.17443346928975734, "learning_rate": 4.3895781735375566e-07, "loss": 0.2896, "loss_nan_ranks": 0, "loss_rank_avg": 0.09814979881048203, "step": 955, "valid_targets_mean": 8773.4, "valid_targets_min": 406 }, { "epoch": 4.7290640394088665, "grad_norm": 0.17648712594362892, "learning_rate": 3.70160454731876e-07, "loss": 0.2935, "loss_nan_ranks": 0, "loss_rank_avg": 0.09249699860811234, "step": 960, "valid_targets_mean": 8434.4, "valid_targets_min": 410 }, { "epoch": 4.753694581280788, "grad_norm": 0.16739065144158322, "learning_rate": 3.0717346809132407e-07, "loss": 0.2902, "loss_nan_ranks": 0, "loss_rank_avg": 0.10244876146316528, "step": 965, "valid_targets_mean": 9382.5, "valid_targets_min": 2089 }, { "epoch": 4.778325123152709, "grad_norm": 0.1667695775270446, "learning_rate": 2.5001550139877707e-07, "loss": 0.2858, "loss_nan_ranks": 0, "loss_rank_avg": 0.09757320582866669, "step": 970, "valid_targets_mean": 9711.1, "valid_targets_min": 5200 }, { "epoch": 4.802955665024631, "grad_norm": 0.17184028910965868, "learning_rate": 1.987034732477877e-07, "loss": 0.2892, "loss_nan_ranks": 0, "loss_rank_avg": 0.09788213670253754, "step": 975, "valid_targets_mean": 8935.1, "valid_targets_min": 3319 }, { "epoch": 4.827586206896552, "grad_norm": 0.16740666268964893, "learning_rate": 1.5325257185093923e-07, "loss": 0.2851, "loss_nan_ranks": 0, "loss_rank_avg": 0.09289947897195816, "step": 980, "valid_targets_mean": 8989.3, "valid_targets_min": 504 }, { "epoch": 4.852216748768473, "grad_norm": 0.17517346033665723, "learning_rate": 1.1367625054416575e-07, "loss": 0.2884, "loss_nan_ranks": 0, "loss_rank_avg": 0.09196974337100983, "step": 985, "valid_targets_mean": 8455.2, "valid_targets_min": 845 }, { "epoch": 4.876847290640394, "grad_norm": 0.17165728117501497, "learning_rate": 7.998622380461563e-08, "loss": 0.2921, "loss_nan_ranks": 0, "loss_rank_avg": 0.10432468354701996, "step": 990, "valid_targets_mean": 8684.4, "valid_targets_min": 387 }, { "epoch": 4.901477832512315, "grad_norm": 0.16816491897161798, "learning_rate": 5.219246378319387e-08, "loss": 0.2887, "loss_nan_ranks": 0, "loss_rank_avg": 0.0968252569437027, "step": 995, "valid_targets_mean": 8864.2, "valid_targets_min": 4695 }, { "epoch": 4.926108374384237, "grad_norm": 0.18359960757267443, "learning_rate": 3.030319735283449e-08, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.09898233413696289, "step": 1000, "valid_targets_mean": 8488.2, "valid_targets_min": 3522 }, { "epoch": 4.9507389162561575, "grad_norm": 0.1729827087715371, "learning_rate": 1.4324903673370583e-08, "loss": 0.2872, "loss_nan_ranks": 0, "loss_rank_avg": 0.10422653704881668, "step": 1005, "valid_targets_mean": 9734.5, "valid_targets_min": 5199 }, { "epoch": 4.975369458128079, "grad_norm": 0.1665182843059149, "learning_rate": 4.262312273721758e-09, "loss": 0.2894, "loss_nan_ranks": 0, "loss_rank_avg": 0.08595976233482361, "step": 1010, "valid_targets_mean": 7786.4, "valid_targets_min": 2848 }, { "epoch": 5.0, "grad_norm": 0.17102014635406348, "learning_rate": 1.184016519673037e-10, "loss": 0.2931, "loss_nan_ranks": 0, "loss_rank_avg": 0.0942406952381134, "step": 1015, "valid_targets_mean": 8724.4, "valid_targets_min": 2268 }, { "epoch": 5.0, "step": 1015, "total_flos": 3.756182037619278e+18, "train_loss": 0.0, "train_runtime": 2.2907, "train_samples_per_second": 42523.793, "train_steps_per_second": 443.093 } ], "logging_steps": 5, "max_steps": 1015, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.756182037619278e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }