{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.050335570469798654, "grad_norm": 14.067043242829744, "learning_rate": 2.285714285714286e-06, "loss": 0.9272, "loss_nan_ranks": 0, "loss_rank_avg": 0.26700228452682495, "step": 5, "valid_targets_mean": 6933.7, "valid_targets_min": 2457 }, { "epoch": 0.10067114093959731, "grad_norm": 6.914800443529387, "learning_rate": 5.142857142857142e-06, "loss": 0.8868, "loss_nan_ranks": 0, "loss_rank_avg": 0.29330819845199585, "step": 10, "valid_targets_mean": 8950.2, "valid_targets_min": 3280 }, { "epoch": 0.15100671140939598, "grad_norm": 2.313179660547222, "learning_rate": 8.000000000000001e-06, "loss": 0.7848, "loss_nan_ranks": 0, "loss_rank_avg": 0.2518497109413147, "step": 15, "valid_targets_mean": 8844.1, "valid_targets_min": 2299 }, { "epoch": 0.20134228187919462, "grad_norm": 1.5075390661307202, "learning_rate": 1.0857142857142858e-05, "loss": 0.7382, "loss_nan_ranks": 0, "loss_rank_avg": 0.22578322887420654, "step": 20, "valid_targets_mean": 8364.3, "valid_targets_min": 3708 }, { "epoch": 0.2516778523489933, "grad_norm": 1.1501359371585838, "learning_rate": 1.3714285714285716e-05, "loss": 0.7031, "loss_nan_ranks": 0, "loss_rank_avg": 0.2374565750360489, "step": 25, "valid_targets_mean": 9124.7, "valid_targets_min": 3015 }, { "epoch": 0.30201342281879195, "grad_norm": 0.8105710331742373, "learning_rate": 1.6571428571428574e-05, "loss": 0.6708, "loss_nan_ranks": 0, "loss_rank_avg": 0.2113051414489746, "step": 30, "valid_targets_mean": 8698.6, "valid_targets_min": 1259 }, { "epoch": 0.3523489932885906, "grad_norm": 0.6231544559752229, "learning_rate": 1.942857142857143e-05, "loss": 0.6344, "loss_nan_ranks": 0, "loss_rank_avg": 0.19961217045783997, "step": 35, "valid_targets_mean": 8497.4, "valid_targets_min": 3348 }, { "epoch": 0.40268456375838924, "grad_norm": 0.4897983592186633, "learning_rate": 2.2285714285714287e-05, "loss": 0.6074, "loss_nan_ranks": 0, "loss_rank_avg": 0.20259523391723633, "step": 40, "valid_targets_mean": 8546.9, "valid_targets_min": 2063 }, { "epoch": 0.45302013422818793, "grad_norm": 0.43138833987068276, "learning_rate": 2.5142857142857143e-05, "loss": 0.5747, "loss_nan_ranks": 0, "loss_rank_avg": 0.2051178216934204, "step": 45, "valid_targets_mean": 8575.3, "valid_targets_min": 2840 }, { "epoch": 0.5033557046979866, "grad_norm": 0.3841996600572438, "learning_rate": 2.8e-05, "loss": 0.5489, "loss_nan_ranks": 0, "loss_rank_avg": 0.19739781320095062, "step": 50, "valid_targets_mean": 8994.1, "valid_targets_min": 3748 }, { "epoch": 0.5536912751677853, "grad_norm": 0.3384894884883755, "learning_rate": 3.085714285714286e-05, "loss": 0.5397, "loss_nan_ranks": 0, "loss_rank_avg": 0.17745928466320038, "step": 55, "valid_targets_mean": 8368.2, "valid_targets_min": 4038 }, { "epoch": 0.6040268456375839, "grad_norm": 0.30063971006378454, "learning_rate": 3.3714285714285716e-05, "loss": 0.5154, "loss_nan_ranks": 0, "loss_rank_avg": 0.17042505741119385, "step": 60, "valid_targets_mean": 8646.4, "valid_targets_min": 3884 }, { "epoch": 0.6543624161073825, "grad_norm": 0.27761325302634815, "learning_rate": 3.6571428571428576e-05, "loss": 0.5001, "loss_nan_ranks": 0, "loss_rank_avg": 0.1616188883781433, "step": 65, "valid_targets_mean": 8152.5, "valid_targets_min": 2735 }, { "epoch": 0.7046979865771812, "grad_norm": 0.26215808993297834, "learning_rate": 3.9428571428571435e-05, "loss": 0.4901, "loss_nan_ranks": 0, "loss_rank_avg": 0.14332228899002075, "step": 70, "valid_targets_mean": 7592.4, "valid_targets_min": 2951 }, { "epoch": 0.7550335570469798, "grad_norm": 0.28480265821738415, "learning_rate": 3.9996021455410475e-05, "loss": 0.4879, "loss_nan_ranks": 0, "loss_rank_avg": 0.1745082437992096, "step": 75, "valid_targets_mean": 8807.3, "valid_targets_min": 4207 }, { "epoch": 0.8053691275167785, "grad_norm": 0.28063407810285146, "learning_rate": 3.9979861330826295e-05, "loss": 0.4644, "loss_nan_ranks": 0, "loss_rank_avg": 0.14077091217041016, "step": 80, "valid_targets_mean": 7697.2, "valid_targets_min": 3204 }, { "epoch": 0.8557046979865772, "grad_norm": 0.2685419255892034, "learning_rate": 3.9951281005196486e-05, "loss": 0.4603, "loss_nan_ranks": 0, "loss_rank_avg": 0.15987437963485718, "step": 85, "valid_targets_mean": 9192.3, "valid_targets_min": 3074 }, { "epoch": 0.9060402684563759, "grad_norm": 0.25518469304078806, "learning_rate": 3.99102982450803e-05, "loss": 0.4573, "loss_nan_ranks": 0, "loss_rank_avg": 0.16007889807224274, "step": 90, "valid_targets_mean": 9071.2, "valid_targets_min": 4077 }, { "epoch": 0.9563758389261745, "grad_norm": 0.292125810481402, "learning_rate": 3.985693852683675e-05, "loss": 0.4411, "loss_nan_ranks": 0, "loss_rank_avg": 0.15639987587928772, "step": 95, "valid_targets_mean": 9211.0, "valid_targets_min": 3703 }, { "epoch": 1.0, "grad_norm": 0.3544831597811695, "learning_rate": 3.9791235020787546e-05, "loss": 0.4358, "loss_nan_ranks": 0, "loss_rank_avg": 0.41546738147735596, "step": 100, "valid_targets_mean": 8125.5, "valid_targets_min": 3221 }, { "epoch": 1.0503355704697988, "grad_norm": 0.3048951044180603, "learning_rate": 3.971322857059726e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.13855554163455963, "step": 105, "valid_targets_mean": 8141.4, "valid_targets_min": 3502 }, { "epoch": 1.1006711409395973, "grad_norm": 0.27845721026650505, "learning_rate": 3.962296766788345e-05, "loss": 0.4345, "loss_nan_ranks": 0, "loss_rank_avg": 0.14278040826320648, "step": 110, "valid_targets_mean": 8561.2, "valid_targets_min": 3965 }, { "epoch": 1.151006711409396, "grad_norm": 0.3061482398268534, "learning_rate": 3.952050842207249e-05, "loss": 0.4304, "loss_nan_ranks": 0, "loss_rank_avg": 0.14196383953094482, "step": 115, "valid_targets_mean": 8437.8, "valid_targets_min": 2414 }, { "epoch": 1.2013422818791946, "grad_norm": 0.2626030786077286, "learning_rate": 3.940591452551993e-05, "loss": 0.4331, "loss_nan_ranks": 0, "loss_rank_avg": 0.13812202215194702, "step": 120, "valid_targets_mean": 8286.2, "valid_targets_min": 2490 }, { "epoch": 1.2516778523489933, "grad_norm": 0.26798574842262085, "learning_rate": 3.927925721391707e-05, "loss": 0.4195, "loss_nan_ranks": 0, "loss_rank_avg": 0.13096113502979279, "step": 125, "valid_targets_mean": 8221.9, "valid_targets_min": 3465 }, { "epoch": 1.302013422818792, "grad_norm": 0.305138300632288, "learning_rate": 3.914061522200825e-05, "loss": 0.4256, "loss_nan_ranks": 0, "loss_rank_avg": 0.11967723816633224, "step": 130, "valid_targets_mean": 7786.5, "valid_targets_min": 2421 }, { "epoch": 1.3523489932885906, "grad_norm": 0.2862842034727045, "learning_rate": 3.899007473464653e-05, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.1479918360710144, "step": 135, "valid_targets_mean": 9349.9, "valid_targets_min": 2832 }, { "epoch": 1.4026845637583891, "grad_norm": 0.2637388574350784, "learning_rate": 3.882772933321807e-05, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.14767968654632568, "step": 140, "valid_targets_mean": 9506.8, "valid_targets_min": 4115 }, { "epoch": 1.4530201342281879, "grad_norm": 0.3015172646055234, "learning_rate": 3.8653679937468556e-05, "loss": 0.4147, "loss_nan_ranks": 0, "loss_rank_avg": 0.13590680062770844, "step": 145, "valid_targets_mean": 8029.0, "valid_targets_min": 2691 }, { "epoch": 1.5033557046979866, "grad_norm": 0.2570832110359043, "learning_rate": 3.846803474276789e-05, "loss": 0.4086, "loss_nan_ranks": 0, "loss_rank_avg": 0.14070050418376923, "step": 150, "valid_targets_mean": 9124.1, "valid_targets_min": 3411 }, { "epoch": 1.5536912751677852, "grad_norm": 0.2798040054064293, "learning_rate": 3.827090915285202e-05, "loss": 0.4146, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273345947265625, "step": 155, "valid_targets_mean": 7573.3, "valid_targets_min": 2830 }, { "epoch": 1.604026845637584, "grad_norm": 0.2685619362726408, "learning_rate": 3.806242570808384e-05, "loss": 0.4118, "loss_nan_ranks": 0, "loss_rank_avg": 0.12897896766662598, "step": 160, "valid_targets_mean": 8312.7, "valid_targets_min": 2632 }, { "epoch": 1.6543624161073827, "grad_norm": 0.2657429865288553, "learning_rate": 3.7842714009277675e-05, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.12389227747917175, "step": 165, "valid_targets_mean": 8151.8, "valid_targets_min": 2946 }, { "epoch": 1.7046979865771812, "grad_norm": 0.28840825658933456, "learning_rate": 3.761191063713476e-05, "loss": 0.4071, "loss_nan_ranks": 0, "loss_rank_avg": 0.136656254529953, "step": 170, "valid_targets_mean": 8608.5, "valid_targets_min": 4102 }, { "epoch": 1.7550335570469797, "grad_norm": 0.2596821078603819, "learning_rate": 3.737015906733978e-05, "loss": 0.4106, "loss_nan_ranks": 0, "loss_rank_avg": 0.11933685839176178, "step": 175, "valid_targets_mean": 8410.7, "valid_targets_min": 3206 }, { "epoch": 1.8053691275167785, "grad_norm": 0.26139104771576044, "learning_rate": 3.711760958137118e-05, "loss": 0.4056, "loss_nan_ranks": 0, "loss_rank_avg": 0.1329861581325531, "step": 180, "valid_targets_mean": 8316.8, "valid_targets_min": 2862 }, { "epoch": 1.8557046979865772, "grad_norm": 0.305203527481438, "learning_rate": 3.6854419173080784e-05, "loss": 0.4115, "loss_nan_ranks": 0, "loss_rank_avg": 0.15162314474582672, "step": 185, "valid_targets_mean": 8494.5, "valid_targets_min": 3703 }, { "epoch": 1.9060402684563758, "grad_norm": 0.2924940281192017, "learning_rate": 3.658075145110083e-05, "loss": 0.4071, "loss_nan_ranks": 0, "loss_rank_avg": 0.12577557563781738, "step": 190, "valid_targets_mean": 8278.9, "valid_targets_min": 3343 }, { "epoch": 1.9563758389261745, "grad_norm": 0.26987694233570014, "learning_rate": 3.6296776537138905e-05, "loss": 0.4078, "loss_nan_ranks": 0, "loss_rank_avg": 0.140755757689476, "step": 195, "valid_targets_mean": 8258.5, "valid_targets_min": 2520 }, { "epoch": 2.0, "grad_norm": 0.37739893150325504, "learning_rate": 3.600267096022413e-05, "loss": 0.4128, "loss_nan_ranks": 0, "loss_rank_avg": 0.4272458553314209, "step": 200, "valid_targets_mean": 9129.7, "valid_targets_min": 2470 }, { "epoch": 2.0503355704697985, "grad_norm": 0.3049953483433764, "learning_rate": 3.569861754697045e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.11460161954164505, "step": 205, "valid_targets_mean": 7970.5, "valid_targets_min": 3494 }, { "epoch": 2.1006711409395975, "grad_norm": 0.2999304719531947, "learning_rate": 3.538480530792498e-05, "loss": 0.3982, "loss_nan_ranks": 0, "loss_rank_avg": 0.12947125732898712, "step": 210, "valid_targets_mean": 8025.0, "valid_targets_min": 2579 }, { "epoch": 2.151006711409396, "grad_norm": 0.3353112866997524, "learning_rate": 3.5061429320072225e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.12318155169487, "step": 215, "valid_targets_mean": 8213.8, "valid_targets_min": 3285 }, { "epoch": 2.2013422818791946, "grad_norm": 0.30258943318520176, "learning_rate": 3.472869060556724e-05, "loss": 0.3953, "loss_nan_ranks": 0, "loss_rank_avg": 0.13107575476169586, "step": 220, "valid_targets_mean": 9016.2, "valid_targets_min": 3927 }, { "epoch": 2.251677852348993, "grad_norm": 0.2766581007748473, "learning_rate": 3.438679600677303e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.12715698778629303, "step": 225, "valid_targets_mean": 8472.8, "valid_targets_min": 3374 }, { "epoch": 2.302013422818792, "grad_norm": 0.27363285439160917, "learning_rate": 3.4035958057679836e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.13462711870670319, "step": 230, "valid_targets_mean": 8907.7, "valid_targets_min": 3054 }, { "epoch": 2.3523489932885906, "grad_norm": 0.3031249283095144, "learning_rate": 3.36763948517864e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.1310027539730072, "step": 235, "valid_targets_mean": 8485.2, "valid_targets_min": 2790 }, { "epoch": 2.402684563758389, "grad_norm": 0.29023372434273265, "learning_rate": 3.330832990652523e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.14061188697814941, "step": 240, "valid_targets_mean": 8742.7, "valid_targets_min": 2578 }, { "epoch": 2.453020134228188, "grad_norm": 0.23540024236787796, "learning_rate": 3.293199202431599e-05, "loss": 0.3947, "loss_nan_ranks": 0, "loss_rank_avg": 0.12830784916877747, "step": 245, "valid_targets_mean": 8962.8, "valid_targets_min": 2643 }, { "epoch": 2.5033557046979866, "grad_norm": 0.25906215046606, "learning_rate": 3.2547615150333855e-05, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.12897969782352448, "step": 250, "valid_targets_mean": 7864.5, "valid_targets_min": 3231 }, { "epoch": 2.553691275167785, "grad_norm": 0.2641585608461065, "learning_rate": 3.2155438227080607e-05, "loss": 0.3934, "loss_nan_ranks": 0, "loss_rank_avg": 0.12830249965190887, "step": 255, "valid_targets_mean": 9147.8, "valid_targets_min": 2996 }, { "epoch": 2.604026845637584, "grad_norm": 0.28192077058039483, "learning_rate": 3.1755705045849465e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.14721933007240295, "step": 260, "valid_targets_mean": 8236.8, "valid_targets_min": 2500 }, { "epoch": 2.6543624161073827, "grad_norm": 0.2630346553683364, "learning_rate": 3.134866409517564e-05, "loss": 0.3956, "loss_nan_ranks": 0, "loss_rank_avg": 0.1302284449338913, "step": 265, "valid_targets_mean": 8351.7, "valid_targets_min": 2983 }, { "epoch": 2.704697986577181, "grad_norm": 0.27865692462524355, "learning_rate": 3.0934568406366875e-05, "loss": 0.3917, "loss_nan_ranks": 0, "loss_rank_avg": 0.12824495136737823, "step": 270, "valid_targets_mean": 7805.7, "valid_targets_min": 3209 }, { "epoch": 2.7550335570469797, "grad_norm": 0.2577331217894111, "learning_rate": 3.0513675396210094e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.13080325722694397, "step": 275, "valid_targets_mean": 8240.6, "valid_targets_min": 3069 }, { "epoch": 2.8053691275167782, "grad_norm": 0.2635426750558006, "learning_rate": 3.0086246706951888e-05, "loss": 0.3859, "loss_nan_ranks": 0, "loss_rank_avg": 0.12472230941057205, "step": 280, "valid_targets_mean": 8878.5, "valid_targets_min": 2369 }, { "epoch": 2.8557046979865772, "grad_norm": 0.2789257907218796, "learning_rate": 2.965254804365222e-05, "loss": 0.3828, "loss_nan_ranks": 0, "loss_rank_avg": 0.1399698108434677, "step": 285, "valid_targets_mean": 8741.5, "valid_targets_min": 4390 }, { "epoch": 2.9060402684563758, "grad_norm": 0.28293835669184936, "learning_rate": 2.921284900901265e-05, "loss": 0.3876, "loss_nan_ranks": 0, "loss_rank_avg": 0.12492035329341888, "step": 290, "valid_targets_mean": 7754.4, "valid_targets_min": 1560 }, { "epoch": 2.9563758389261743, "grad_norm": 0.2481634552274392, "learning_rate": 2.876742293578155e-05, "loss": 0.3867, "loss_nan_ranks": 0, "loss_rank_avg": 0.12382625788450241, "step": 295, "valid_targets_mean": 8065.1, "valid_targets_min": 2619 }, { "epoch": 3.0, "grad_norm": 0.38393308540389254, "learning_rate": 2.831654671684066e-05, "loss": 0.3839, "loss_nan_ranks": 0, "loss_rank_avg": 0.3663029372692108, "step": 300, "valid_targets_mean": 8224.7, "valid_targets_min": 4535 }, { "epoch": 3.0503355704697985, "grad_norm": 0.30636912119481896, "learning_rate": 2.7860500633078475e-05, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.13847512006759644, "step": 305, "valid_targets_mean": 9187.4, "valid_targets_min": 1685 }, { "epoch": 3.1006711409395975, "grad_norm": 0.2600938627349429, "learning_rate": 2.7399568179157582e-05, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.1181330680847168, "step": 310, "valid_targets_mean": 7997.2, "valid_targets_min": 3698 }, { "epoch": 3.151006711409396, "grad_norm": 0.2584179619503242, "learning_rate": 2.693403588728415e-05, "loss": 0.3801, "loss_nan_ranks": 0, "loss_rank_avg": 0.12773603200912476, "step": 315, "valid_targets_mean": 8189.1, "valid_targets_min": 3971 }, { "epoch": 3.2013422818791946, "grad_norm": 0.2774796872269688, "learning_rate": 2.6464193149089204e-05, "loss": 0.3812, "loss_nan_ranks": 0, "loss_rank_avg": 0.10868334770202637, "step": 320, "valid_targets_mean": 7379.7, "valid_targets_min": 3032 }, { "epoch": 3.251677852348993, "grad_norm": 0.3133065386988344, "learning_rate": 2.5990332035732388e-05, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.11322431266307831, "step": 325, "valid_targets_mean": 7262.5, "valid_targets_min": 3063 }, { "epoch": 3.302013422818792, "grad_norm": 0.2758530310070607, "learning_rate": 2.5512747116339985e-05, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.12534014880657196, "step": 330, "valid_targets_mean": 8078.2, "valid_targets_min": 2959 }, { "epoch": 3.3523489932885906, "grad_norm": 0.2609966069352176, "learning_rate": 2.5031735274890176e-05, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.12462468445301056, "step": 335, "valid_targets_mean": 8127.2, "valid_targets_min": 3820 }, { "epoch": 3.402684563758389, "grad_norm": 0.254083690107489, "learning_rate": 2.454759552565923e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.11217983812093735, "step": 340, "valid_targets_mean": 7862.0, "valid_targets_min": 2752 }, { "epoch": 3.453020134228188, "grad_norm": 0.3236658923875938, "learning_rate": 2.4060628827343525e-05, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.13022278249263763, "step": 345, "valid_targets_mean": 8530.5, "valid_targets_min": 3280 }, { "epoch": 3.5033557046979866, "grad_norm": 0.239075753126642, "learning_rate": 2.3571137895972735e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.13119235634803772, "step": 350, "valid_targets_mean": 9104.2, "valid_targets_min": 3026 }, { "epoch": 3.553691275167785, "grad_norm": 0.2376608883417584, "learning_rate": 2.307942701673067e-05, "loss": 0.3791, "loss_nan_ranks": 0, "loss_rank_avg": 0.11539403349161148, "step": 355, "valid_targets_mean": 7391.9, "valid_targets_min": 3058 }, { "epoch": 3.604026845637584, "grad_norm": 0.23963182948426243, "learning_rate": 2.258580185480067e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.12229941040277481, "step": 360, "valid_targets_mean": 8436.8, "valid_targets_min": 4365 }, { "epoch": 3.6543624161073827, "grad_norm": 0.25828526074715563, "learning_rate": 2.209056926535307e-05, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.12994864583015442, "step": 365, "valid_targets_mean": 9589.9, "valid_targets_min": 2527 }, { "epoch": 3.704697986577181, "grad_norm": 0.2600007439024421, "learning_rate": 2.1594037102793054e-05, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.13144755363464355, "step": 370, "valid_targets_mean": 8440.9, "valid_targets_min": 3689 }, { "epoch": 3.7550335570469797, "grad_norm": 0.2826423814550457, "learning_rate": 2.1096514029387204e-05, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.11268685758113861, "step": 375, "valid_targets_mean": 8373.9, "valid_targets_min": 2523 }, { "epoch": 3.8053691275167782, "grad_norm": 0.23106532956293616, "learning_rate": 2.0598309323387974e-05, "loss": 0.3776, "loss_nan_ranks": 0, "loss_rank_avg": 0.11632819473743439, "step": 380, "valid_targets_mean": 7967.5, "valid_targets_min": 2448 }, { "epoch": 3.8557046979865772, "grad_norm": 0.2793229698264241, "learning_rate": 2.0099732686775165e-05, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.12431603670120239, "step": 385, "valid_targets_mean": 8111.6, "valid_targets_min": 3320 }, { "epoch": 3.9060402684563758, "grad_norm": 0.25004553466508994, "learning_rate": 1.9601094052734043e-05, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.11581622064113617, "step": 390, "valid_targets_mean": 7781.6, "valid_targets_min": 2793 }, { "epoch": 3.9563758389261743, "grad_norm": 0.23606564352865256, "learning_rate": 1.910270339298971e-05, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12360651046037674, "step": 395, "valid_targets_mean": 9079.7, "valid_targets_min": 2256 }, { "epoch": 4.0, "grad_norm": 0.35482595437396325, "learning_rate": 1.8604870525117496e-05, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.34915870428085327, "step": 400, "valid_targets_mean": 8397.6, "valid_targets_min": 2217 }, { "epoch": 4.050335570469799, "grad_norm": 0.2342122173238615, "learning_rate": 1.810790491994926e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.12274056673049927, "step": 405, "valid_targets_mean": 7699.5, "valid_targets_min": 3326 }, { "epoch": 4.100671140939597, "grad_norm": 0.23963948392191442, "learning_rate": 1.7612115509195118e-05, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.1295328140258789, "step": 410, "valid_targets_mean": 8832.6, "valid_targets_min": 3074 }, { "epoch": 4.151006711409396, "grad_norm": 0.243907924996217, "learning_rate": 1.7117810493400403e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.13174119591712952, "step": 415, "valid_targets_mean": 8698.4, "valid_targets_min": 2273 }, { "epoch": 4.201342281879195, "grad_norm": 0.26480175820703833, "learning_rate": 1.6625297150357103e-05, "loss": 0.3661, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256350874900818, "step": 420, "valid_targets_mean": 8510.8, "valid_targets_min": 2007 }, { "epoch": 4.251677852348993, "grad_norm": 0.23403758629417248, "learning_rate": 1.613488164408894e-05, "loss": 0.3726, "loss_nan_ranks": 0, "loss_rank_avg": 0.13531510531902313, "step": 425, "valid_targets_mean": 9202.9, "valid_targets_min": 4057 }, { "epoch": 4.302013422818792, "grad_norm": 0.24821740250948118, "learning_rate": 1.5646868834528756e-05, "loss": 0.3754, "loss_nan_ranks": 0, "loss_rank_avg": 0.12739360332489014, "step": 430, "valid_targets_mean": 8427.7, "valid_targets_min": 2689 }, { "epoch": 4.35234899328859, "grad_norm": 0.26401888784599165, "learning_rate": 1.5161562088006649e-05, "loss": 0.3683, "loss_nan_ranks": 0, "loss_rank_avg": 0.12882062792778015, "step": 435, "valid_targets_mean": 8841.7, "valid_targets_min": 3116 }, { "epoch": 4.402684563758389, "grad_norm": 0.24650023655502046, "learning_rate": 1.46792630886665e-05, "loss": 0.3727, "loss_nan_ranks": 0, "loss_rank_avg": 0.11792377382516861, "step": 440, "valid_targets_mean": 8282.2, "valid_targets_min": 3285 }, { "epoch": 4.453020134228188, "grad_norm": 0.21989371193578802, "learning_rate": 1.4200271650928277e-05, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.12472942471504211, "step": 445, "valid_targets_mean": 8738.6, "valid_targets_min": 3032 }, { "epoch": 4.503355704697986, "grad_norm": 0.25636623550713955, "learning_rate": 1.3724885533112595e-05, "loss": 0.3713, "loss_nan_ranks": 0, "loss_rank_avg": 0.12675310671329498, "step": 450, "valid_targets_mean": 9018.7, "valid_targets_min": 3726 }, { "epoch": 4.553691275167785, "grad_norm": 0.21737284709139357, "learning_rate": 1.3253400252343403e-05, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.13098780810832977, "step": 455, "valid_targets_mean": 8928.5, "valid_targets_min": 1627 }, { "epoch": 4.604026845637584, "grad_norm": 0.28414631511633537, "learning_rate": 1.2786108900843927e-05, "loss": 0.3675, "loss_nan_ranks": 0, "loss_rank_avg": 0.1366838663816452, "step": 460, "valid_targets_mean": 8725.3, "valid_targets_min": 2298 }, { "epoch": 4.654362416107382, "grad_norm": 0.23351941571371157, "learning_rate": 1.2323301963739995e-05, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.10459558665752411, "step": 465, "valid_targets_mean": 7133.8, "valid_targets_min": 2295 }, { "epoch": 4.704697986577181, "grad_norm": 0.24312028409352227, "learning_rate": 1.1865267138484e-05, "loss": 0.367, "loss_nan_ranks": 0, "loss_rank_avg": 0.12919902801513672, "step": 470, "valid_targets_mean": 8653.2, "valid_targets_min": 3442 }, { "epoch": 4.75503355704698, "grad_norm": 0.2413133969520935, "learning_rate": 1.1412289156011816e-05, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.12121167778968811, "step": 475, "valid_targets_mean": 9176.2, "valid_targets_min": 3055 }, { "epoch": 4.805369127516778, "grad_norm": 0.24338773318065152, "learning_rate": 1.0964649603743837e-05, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.1103016585111618, "step": 480, "valid_targets_mean": 7677.8, "valid_targets_min": 913 }, { "epoch": 4.855704697986577, "grad_norm": 0.2090170817247366, "learning_rate": 1.0522626750540029e-05, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.11717471480369568, "step": 485, "valid_targets_mean": 9180.3, "valid_targets_min": 3708 }, { "epoch": 4.906040268456376, "grad_norm": 0.22269853809647422, "learning_rate": 1.0086495373718048e-05, "loss": 0.3677, "loss_nan_ranks": 0, "loss_rank_avg": 0.13055413961410522, "step": 490, "valid_targets_mean": 8114.8, "valid_targets_min": 3226 }, { "epoch": 4.956375838926174, "grad_norm": 0.22882550873144628, "learning_rate": 9.656526588241745e-06, "loss": 0.3679, "loss_nan_ranks": 0, "loss_rank_avg": 0.11334412544965744, "step": 495, "valid_targets_mean": 8118.7, "valid_targets_min": 2793 }, { "epoch": 5.0, "grad_norm": 0.3733228007434408, "learning_rate": 9.232987678186357e-06, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.3762795329093933, "step": 500, "valid_targets_mean": 7505.5, "valid_targets_min": 3065 }, { "epoch": 5.050335570469799, "grad_norm": 0.23829621547620775, "learning_rate": 8.816141930585067e-06, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.1350947916507721, "step": 505, "valid_targets_mean": 9287.1, "valid_targets_min": 2062 }, { "epoch": 5.100671140939597, "grad_norm": 0.22395514111013695, "learning_rate": 8.406248471760357e-06, "loss": 0.3678, "loss_nan_ranks": 0, "loss_rank_avg": 0.12289828807115555, "step": 510, "valid_targets_mean": 8912.8, "valid_targets_min": 3025 }, { "epoch": 5.151006711409396, "grad_norm": 0.2122241497882817, "learning_rate": 8.003562106241727e-06, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.12498777359724045, "step": 515, "valid_targets_mean": 8863.8, "valid_targets_min": 2840 }, { "epoch": 5.201342281879195, "grad_norm": 0.28891708884786693, "learning_rate": 7.608333158370036e-06, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.11199884861707687, "step": 520, "valid_targets_mean": 7870.0, "valid_targets_min": 2620 }, { "epoch": 5.251677852348993, "grad_norm": 0.23394607717295618, "learning_rate": 7.220807316686886e-06, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.12199863791465759, "step": 525, "valid_targets_mean": 8564.5, "valid_targets_min": 4298 }, { "epoch": 5.302013422818792, "grad_norm": 0.21294918655504008, "learning_rate": 6.841225481205749e-06, "loss": 0.3635, "loss_nan_ranks": 0, "loss_rank_avg": 0.12100854516029358, "step": 530, "valid_targets_mean": 9039.7, "valid_targets_min": 1560 }, { "epoch": 5.35234899328859, "grad_norm": 0.21494480970028404, "learning_rate": 6.469823613659896e-06, "loss": 0.3651, "loss_nan_ranks": 0, "loss_rank_avg": 0.11410963535308838, "step": 535, "valid_targets_mean": 8951.2, "valid_targets_min": 3544 }, { "epoch": 5.402684563758389, "grad_norm": 0.21580063331767288, "learning_rate": 6.106832590820053e-06, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.11225862056016922, "step": 540, "valid_targets_mean": 7322.1, "valid_targets_min": 3287 }, { "epoch": 5.453020134228188, "grad_norm": 0.22123701317383823, "learning_rate": 5.752478060973108e-06, "loss": 0.3669, "loss_nan_ranks": 0, "loss_rank_avg": 0.12023737281560898, "step": 545, "valid_targets_mean": 8372.3, "valid_targets_min": 3655 }, { "epoch": 5.503355704697986, "grad_norm": 0.2143304441931162, "learning_rate": 5.406980303650984e-06, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282443404197693, "step": 550, "valid_targets_mean": 9076.7, "valid_targets_min": 3224 }, { "epoch": 5.553691275167785, "grad_norm": 0.2059409325448744, "learning_rate": 5.070554092696997e-06, "loss": 0.3658, "loss_nan_ranks": 0, "loss_rank_avg": 0.1095615029335022, "step": 555, "valid_targets_mean": 7624.6, "valid_targets_min": 3724 }, { "epoch": 5.604026845637584, "grad_norm": 0.20837192180926864, "learning_rate": 4.74340856275467e-06, "loss": 0.3649, "loss_nan_ranks": 0, "loss_rank_avg": 0.11278112977743149, "step": 560, "valid_targets_mean": 8129.7, "valid_targets_min": 2778 }, { "epoch": 5.654362416107382, "grad_norm": 0.21402988791875252, "learning_rate": 4.425747079262121e-06, "loss": 0.3674, "loss_nan_ranks": 0, "loss_rank_avg": 0.12279454618692398, "step": 565, "valid_targets_mean": 8821.7, "valid_targets_min": 2820 }, { "epoch": 5.704697986577181, "grad_norm": 0.2074510845148661, "learning_rate": 4.11776711203278e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.1227295845746994, "step": 570, "valid_targets_mean": 8974.7, "valid_targets_min": 2500 }, { "epoch": 5.75503355704698, "grad_norm": 0.2306440244667225, "learning_rate": 3.819660112501053e-06, "loss": 0.3604, "loss_nan_ranks": 0, "loss_rank_avg": 0.11513447761535645, "step": 575, "valid_targets_mean": 7983.1, "valid_targets_min": 3126 }, { "epoch": 5.805369127516778, "grad_norm": 0.2172176590730982, "learning_rate": 3.531611394709216e-06, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.12855035066604614, "step": 580, "valid_targets_mean": 8633.9, "valid_targets_min": 2741 }, { "epoch": 5.855704697986577, "grad_norm": 0.21562611800655393, "learning_rate": 3.2538000201095363e-06, "loss": 0.3663, "loss_nan_ranks": 0, "loss_rank_avg": 0.12427804619073868, "step": 585, "valid_targets_mean": 9145.5, "valid_targets_min": 3920 }, { "epoch": 5.906040268456376, "grad_norm": 0.2190237582438193, "learning_rate": 2.986398686253211e-06, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.13051660358905792, "step": 590, "valid_targets_mean": 8620.1, "valid_targets_min": 3357 }, { "epoch": 5.956375838926174, "grad_norm": 0.2080744597422396, "learning_rate": 2.729573619435384e-06, "loss": 0.363, "loss_nan_ranks": 0, "loss_rank_avg": 0.1293225884437561, "step": 595, "valid_targets_mean": 8719.9, "valid_targets_min": 3853 }, { "epoch": 6.0, "grad_norm": 0.3674873001472298, "learning_rate": 2.483484471362869e-06, "loss": 0.362, "loss_nan_ranks": 0, "loss_rank_avg": 0.3444654941558838, "step": 600, "valid_targets_mean": 7666.7, "valid_targets_min": 2472 }, { "epoch": 6.050335570469799, "grad_norm": 0.21298796074381432, "learning_rate": 2.248284219908918e-06, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.09744159877300262, "step": 605, "valid_targets_mean": 7061.6, "valid_targets_min": 913 }, { "epoch": 6.100671140939597, "grad_norm": 0.20603793728596564, "learning_rate": 2.024119074016664e-06, "loss": 0.3661, "loss_nan_ranks": 0, "loss_rank_avg": 0.12634268403053284, "step": 610, "valid_targets_mean": 8669.8, "valid_targets_min": 4100 }, { "epoch": 6.151006711409396, "grad_norm": 0.19281788398410155, "learning_rate": 1.8111283828103566e-06, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.12144550681114197, "step": 615, "valid_targets_mean": 8999.8, "valid_targets_min": 2519 }, { "epoch": 6.201342281879195, "grad_norm": 0.22141968540557966, "learning_rate": 1.6094445489709886e-06, "loss": 0.3646, "loss_nan_ranks": 0, "loss_rank_avg": 0.12076833099126816, "step": 620, "valid_targets_mean": 9312.1, "valid_targets_min": 2778 }, { "epoch": 6.251677852348993, "grad_norm": 0.1984445594470365, "learning_rate": 1.4191929464299481e-06, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.12098690867424011, "step": 625, "valid_targets_mean": 9203.8, "valid_targets_min": 3448 }, { "epoch": 6.302013422818792, "grad_norm": 0.19771555014644152, "learning_rate": 1.2404918424321277e-06, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.10897114872932434, "step": 630, "valid_targets_mean": 7500.3, "valid_targets_min": 991 }, { "epoch": 6.35234899328859, "grad_norm": 0.20058217433838568, "learning_rate": 1.073452324016715e-06, "loss": 0.3614, "loss_nan_ranks": 0, "loss_rank_avg": 0.1343023031949997, "step": 635, "valid_targets_mean": 9441.8, "valid_targets_min": 3744 }, { "epoch": 6.402684563758389, "grad_norm": 0.2162603197139562, "learning_rate": 9.181782289615149e-07, "loss": 0.3569, "loss_nan_ranks": 0, "loss_rank_avg": 0.11951828002929688, "step": 640, "valid_targets_mean": 8311.2, "valid_targets_min": 3340 }, { "epoch": 6.453020134228188, "grad_norm": 0.21759350387739748, "learning_rate": 7.747660812336221e-07, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.12520036101341248, "step": 645, "valid_targets_mean": 8280.2, "valid_targets_min": 1940 }, { "epoch": 6.503355704697986, "grad_norm": 0.21007505903652565, "learning_rate": 6.433050309866717e-07, "loss": 0.3636, "loss_nan_ranks": 0, "loss_rank_avg": 0.1255347728729248, "step": 650, "valid_targets_mean": 7931.5, "valid_targets_min": 3055 }, { "epoch": 6.553691275167785, "grad_norm": 0.222415922560622, "learning_rate": 5.238767991418737e-07, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.10943958163261414, "step": 655, "valid_targets_mean": 7532.7, "valid_targets_min": 2637 }, { "epoch": 6.604026845637584, "grad_norm": 0.20158116982395177, "learning_rate": 4.165556265873716e-07, "loss": 0.3589, "loss_nan_ranks": 0, "loss_rank_avg": 0.12891879677772522, "step": 660, "valid_targets_mean": 8930.4, "valid_targets_min": 3999 }, { "epoch": 6.654362416107382, "grad_norm": 0.20822765851254327, "learning_rate": 3.214082280274067e-07, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.12707442045211792, "step": 665, "valid_targets_mean": 8714.0, "valid_targets_min": 2643 }, { "epoch": 6.704697986577181, "grad_norm": 0.20171448776496836, "learning_rate": 2.384937505100804e-07, "loss": 0.368, "loss_nan_ranks": 0, "loss_rank_avg": 0.12111932784318924, "step": 670, "valid_targets_mean": 8530.3, "valid_targets_min": 2996 }, { "epoch": 6.75503355704698, "grad_norm": 0.2091122427985211, "learning_rate": 1.6786373665939492e-07, "loss": 0.3692, "loss_nan_ranks": 0, "loss_rank_avg": 0.1105751022696495, "step": 675, "valid_targets_mean": 7289.7, "valid_targets_min": 2951 }, { "epoch": 6.805369127516778, "grad_norm": 0.19123999178770715, "learning_rate": 1.0956209263453421e-07, "loss": 0.3633, "loss_nan_ranks": 0, "loss_rank_avg": 0.11396750807762146, "step": 680, "valid_targets_mean": 8709.4, "valid_targets_min": 481 }, { "epoch": 6.855704697986577, "grad_norm": 0.20611680687469927, "learning_rate": 6.362506083618103e-08, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.12498224526643753, "step": 685, "valid_targets_mean": 8953.2, "valid_targets_min": 3802 }, { "epoch": 6.906040268456376, "grad_norm": 0.1966589947991026, "learning_rate": 3.0081197376965465e-08, "loss": 0.3574, "loss_nan_ranks": 0, "loss_rank_avg": 0.12874117493629456, "step": 690, "valid_targets_mean": 8914.9, "valid_targets_min": 3898 }, { "epoch": 6.956375838926174, "grad_norm": 0.212158333151148, "learning_rate": 8.951354329933548e-09, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.12312234938144684, "step": 695, "valid_targets_mean": 8675.7, "valid_targets_min": 2308 }, { "epoch": 7.0, "grad_norm": 0.34474378658754257, "learning_rate": 2.486667661627529e-10, "loss": 0.3647, "loss_nan_ranks": 0, "loss_rank_avg": 0.3694714307785034, "step": 700, "valid_targets_mean": 8521.5, "valid_targets_min": 2998 }, { "epoch": 7.0, "step": 700, "total_flos": 2.638568480974045e+18, "train_loss": 0.0, "train_runtime": 0.8295, "train_samples_per_second": 80459.329, "train_steps_per_second": 843.831 } ], "logging_steps": 5, "max_steps": 700, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.638568480974045e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }