{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 567, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06172839506172839, "grad_norm": 10.429516590233682, "learning_rate": 2.8070175438596493e-06, "loss": 0.8393, "loss_nan_ranks": 0, "loss_rank_avg": 0.2808097004890442, "step": 5, "valid_targets_mean": 9607.8, "valid_targets_min": 3068 }, { "epoch": 0.12345679012345678, "grad_norm": 5.065259502819224, "learning_rate": 6.31578947368421e-06, "loss": 0.7853, "loss_nan_ranks": 0, "loss_rank_avg": 0.2494523823261261, "step": 10, "valid_targets_mean": 9841.2, "valid_targets_min": 5719 }, { "epoch": 0.18518518518518517, "grad_norm": 1.6595151332203468, "learning_rate": 9.824561403508772e-06, "loss": 0.7006, "loss_nan_ranks": 0, "loss_rank_avg": 0.23369759321212769, "step": 15, "valid_targets_mean": 10128.8, "valid_targets_min": 7342 }, { "epoch": 0.24691358024691357, "grad_norm": 1.2957924076936072, "learning_rate": 1.3333333333333333e-05, "loss": 0.6693, "loss_nan_ranks": 0, "loss_rank_avg": 0.21334174275398254, "step": 20, "valid_targets_mean": 9463.6, "valid_targets_min": 4593 }, { "epoch": 0.30864197530864196, "grad_norm": 0.7601632497112111, "learning_rate": 1.6842105263157896e-05, "loss": 0.6305, "loss_nan_ranks": 0, "loss_rank_avg": 0.20799314975738525, "step": 25, "valid_targets_mean": 11056.0, "valid_targets_min": 7081 }, { "epoch": 0.37037037037037035, "grad_norm": 0.5556757386274306, "learning_rate": 2.035087719298246e-05, "loss": 0.6038, "loss_nan_ranks": 0, "loss_rank_avg": 0.19161568582057953, "step": 30, "valid_targets_mean": 9825.4, "valid_targets_min": 6376 }, { "epoch": 0.43209876543209874, "grad_norm": 0.5153662825148531, "learning_rate": 2.385964912280702e-05, "loss": 0.5804, "loss_nan_ranks": 0, "loss_rank_avg": 0.19309845566749573, "step": 35, "valid_targets_mean": 10027.2, "valid_targets_min": 5491 }, { "epoch": 0.49382716049382713, "grad_norm": 0.4106317408119749, "learning_rate": 2.7368421052631583e-05, "loss": 0.5557, "loss_nan_ranks": 0, "loss_rank_avg": 0.1776849925518036, "step": 40, "valid_targets_mean": 9618.7, "valid_targets_min": 3252 }, { "epoch": 0.5555555555555556, "grad_norm": 0.35411307929247426, "learning_rate": 3.087719298245614e-05, "loss": 0.5349, "loss_nan_ranks": 0, "loss_rank_avg": 0.17469243705272675, "step": 45, "valid_targets_mean": 10166.2, "valid_targets_min": 6091 }, { "epoch": 0.6172839506172839, "grad_norm": 0.3028156061226061, "learning_rate": 3.43859649122807e-05, "loss": 0.5171, "loss_nan_ranks": 0, "loss_rank_avg": 0.16911481320858002, "step": 50, "valid_targets_mean": 10112.2, "valid_targets_min": 1278 }, { "epoch": 0.6790123456790124, "grad_norm": 0.24266635032746242, "learning_rate": 3.789473684210526e-05, "loss": 0.5033, "loss_nan_ranks": 0, "loss_rank_avg": 0.15108071267604828, "step": 55, "valid_targets_mean": 9476.6, "valid_targets_min": 405 }, { "epoch": 0.7407407407407407, "grad_norm": 0.225540686496095, "learning_rate": 3.999848220229662e-05, "loss": 0.4919, "loss_nan_ranks": 0, "loss_rank_avg": 0.1633068025112152, "step": 60, "valid_targets_mean": 10242.9, "valid_targets_min": 6762 }, { "epoch": 0.8024691358024691, "grad_norm": 0.233196689176046, "learning_rate": 3.998140962368987e-05, "loss": 0.479, "loss_nan_ranks": 0, "loss_rank_avg": 0.15809178352355957, "step": 65, "valid_targets_mean": 10012.9, "valid_targets_min": 5638 }, { "epoch": 0.8641975308641975, "grad_norm": 0.22642340209998182, "learning_rate": 3.994538346771576e-05, "loss": 0.4669, "loss_nan_ranks": 0, "loss_rank_avg": 0.145830899477005, "step": 70, "valid_targets_mean": 10058.6, "valid_targets_min": 7540 }, { "epoch": 0.9259259259259259, "grad_norm": 0.21957679869413865, "learning_rate": 3.989043790736547e-05, "loss": 0.4607, "loss_nan_ranks": 0, "loss_rank_avg": 0.14295609295368195, "step": 75, "valid_targets_mean": 9880.9, "valid_targets_min": 4567 }, { "epoch": 0.9876543209876543, "grad_norm": 0.259082441417068, "learning_rate": 3.9816625061831206e-05, "loss": 0.4539, "loss_nan_ranks": 0, "loss_rank_avg": 0.14566341042518616, "step": 80, "valid_targets_mean": 10036.9, "valid_targets_min": 6974 }, { "epoch": 1.0493827160493827, "grad_norm": 0.29381252653535816, "learning_rate": 3.972401494706805e-05, "loss": 0.4489, "loss_nan_ranks": 0, "loss_rank_avg": 0.14456677436828613, "step": 85, "valid_targets_mean": 10007.5, "valid_targets_min": 6170 }, { "epoch": 1.1111111111111112, "grad_norm": 0.2514362103454548, "learning_rate": 3.9612695409379555e-05, "loss": 0.4473, "loss_nan_ranks": 0, "loss_rank_avg": 0.156326562166214, "step": 90, "valid_targets_mean": 10273.3, "valid_targets_min": 6712 }, { "epoch": 1.1728395061728394, "grad_norm": 0.24751778888652187, "learning_rate": 3.948277204209021e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.14379754662513733, "step": 95, "valid_targets_mean": 10190.5, "valid_targets_min": 5246 }, { "epoch": 1.2345679012345678, "grad_norm": 0.2494831866139906, "learning_rate": 3.933436808538375e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.13491873443126678, "step": 100, "valid_targets_mean": 9260.9, "valid_targets_min": 5003 }, { "epoch": 1.2962962962962963, "grad_norm": 0.24159264180274795, "learning_rate": 3.916762430940245e-05, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485365480184555, "step": 105, "valid_targets_mean": 10453.7, "valid_targets_min": 6676 }, { "epoch": 1.3580246913580247, "grad_norm": 0.240571750078312, "learning_rate": 3.898269888071803e-05, "loss": 0.4294, "loss_nan_ranks": 0, "loss_rank_avg": 0.1464928835630417, "step": 110, "valid_targets_mean": 10115.0, "valid_targets_min": 321 }, { "epoch": 1.4197530864197532, "grad_norm": 0.23171615291838898, "learning_rate": 3.877976721230114e-05, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.13552333414554596, "step": 115, "valid_targets_mean": 10116.0, "valid_targets_min": 4165 }, { "epoch": 1.4814814814814814, "grad_norm": 0.23123951273615284, "learning_rate": 3.85590217971315e-05, "loss": 0.4247, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396927833557129, "step": 120, "valid_targets_mean": 9472.6, "valid_targets_min": 3369 }, { "epoch": 1.5432098765432098, "grad_norm": 0.27876028230350475, "learning_rate": 3.832067202560668e-05, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.14294642210006714, "step": 125, "valid_targets_mean": 9702.7, "valid_targets_min": 3563 }, { "epoch": 1.6049382716049383, "grad_norm": 0.24112690266489265, "learning_rate": 3.806494398692258e-05, "loss": 0.4223, "loss_nan_ranks": 0, "loss_rank_avg": 0.1513240933418274, "step": 130, "valid_targets_mean": 10773.1, "valid_targets_min": 6916 }, { "epoch": 1.6666666666666665, "grad_norm": 0.28682215602360966, "learning_rate": 3.77920802546142e-05, "loss": 0.4225, "loss_nan_ranks": 0, "loss_rank_avg": 0.13537049293518066, "step": 135, "valid_targets_mean": 8778.2, "valid_targets_min": 4773 }, { "epoch": 1.7283950617283952, "grad_norm": 0.22939970462745563, "learning_rate": 3.750233965645985e-05, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.1321515440940857, "step": 140, "valid_targets_mean": 9729.9, "valid_targets_min": 4328 }, { "epoch": 1.7901234567901234, "grad_norm": 0.2532856415818664, "learning_rate": 3.719599702896745e-05, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.13605017960071564, "step": 145, "valid_targets_mean": 10582.2, "valid_targets_min": 7105 }, { "epoch": 1.8518518518518519, "grad_norm": 0.21865623660828168, "learning_rate": 3.687334295667533e-05, "loss": 0.4135, "loss_nan_ranks": 0, "loss_rank_avg": 0.1345449984073639, "step": 150, "valid_targets_mean": 9758.0, "valid_targets_min": 3245 }, { "epoch": 1.9135802469135803, "grad_norm": 0.22032076579466348, "learning_rate": 3.653468349651527e-05, "loss": 0.4082, "loss_nan_ranks": 0, "loss_rank_avg": 0.1432110071182251, "step": 155, "valid_targets_mean": 10140.4, "valid_targets_min": 3954 }, { "epoch": 1.9753086419753085, "grad_norm": 0.2300084517459775, "learning_rate": 3.6180339887498953e-05, "loss": 0.4116, "loss_nan_ranks": 0, "loss_rank_avg": 0.14358800649642944, "step": 160, "valid_targets_mean": 10277.0, "valid_targets_min": 5517 }, { "epoch": 2.037037037037037, "grad_norm": 0.26725994957529775, "learning_rate": 3.581064824600327e-05, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.12686286866664886, "step": 165, "valid_targets_mean": 9887.2, "valid_targets_min": 6671 }, { "epoch": 2.0987654320987654, "grad_norm": 0.2839301734419817, "learning_rate": 3.542595924694362e-05, "loss": 0.4006, "loss_nan_ranks": 0, "loss_rank_avg": 0.12411850690841675, "step": 170, "valid_targets_mean": 10430.6, "valid_targets_min": 4962 }, { "epoch": 2.1604938271604937, "grad_norm": 0.23886149599663145, "learning_rate": 3.502663779113747e-05, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273406594991684, "step": 175, "valid_targets_mean": 9780.4, "valid_targets_min": 6883 }, { "epoch": 2.2222222222222223, "grad_norm": 0.2508286434733983, "learning_rate": 3.4613062659173865e-05, "loss": 0.4005, "loss_nan_ranks": 0, "loss_rank_avg": 0.13528917729854584, "step": 180, "valid_targets_mean": 10432.4, "valid_targets_min": 2672 }, { "epoch": 2.2839506172839505, "grad_norm": 0.28154810246127465, "learning_rate": 3.418562615211707e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.13502192497253418, "step": 185, "valid_targets_mean": 9436.0, "valid_targets_min": 321 }, { "epoch": 2.3456790123456788, "grad_norm": 0.25037886356869404, "learning_rate": 3.374473371938526e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.13342218101024628, "step": 190, "valid_targets_mean": 10217.9, "valid_targets_min": 5884 }, { "epoch": 2.4074074074074074, "grad_norm": 0.22241283286592672, "learning_rate": 3.329080357415716e-05, "loss": 0.4011, "loss_nan_ranks": 0, "loss_rank_avg": 0.13278549909591675, "step": 195, "valid_targets_mean": 10201.8, "valid_targets_min": 5485 }, { "epoch": 2.4691358024691357, "grad_norm": 0.25914481987156485, "learning_rate": 3.282426629667157e-05, "loss": 0.4, "loss_nan_ranks": 0, "loss_rank_avg": 0.13765950500965118, "step": 200, "valid_targets_mean": 9836.8, "valid_targets_min": 1763 }, { "epoch": 2.5308641975308643, "grad_norm": 0.2678736062298823, "learning_rate": 3.234556442579586e-05, "loss": 0.3963, "loss_nan_ranks": 0, "loss_rank_avg": 0.1374882459640503, "step": 205, "valid_targets_mean": 9808.9, "valid_targets_min": 5540 }, { "epoch": 2.5925925925925926, "grad_norm": 0.22739006549004376, "learning_rate": 3.18551520392511e-05, "loss": 0.3911, "loss_nan_ranks": 0, "loss_rank_avg": 0.11978818476200104, "step": 210, "valid_targets_mean": 9023.9, "valid_targets_min": 5865 }, { "epoch": 2.6543209876543212, "grad_norm": 0.2480434833866747, "learning_rate": 3.1353494322891806e-05, "loss": 0.3967, "loss_nan_ranks": 0, "loss_rank_avg": 0.13371124863624573, "step": 215, "valid_targets_mean": 10042.4, "valid_targets_min": 5992 }, { "epoch": 2.7160493827160495, "grad_norm": 0.2415014429826874, "learning_rate": 3.084106712944899e-05, "loss": 0.3924, "loss_nan_ranks": 0, "loss_rank_avg": 0.11986161768436432, "step": 220, "valid_targets_mean": 8822.5, "valid_targets_min": 5914 }, { "epoch": 2.7777777777777777, "grad_norm": 0.22748383065140926, "learning_rate": 3.0318356527155024e-05, "loss": 0.3959, "loss_nan_ranks": 0, "loss_rank_avg": 0.13959044218063354, "step": 225, "valid_targets_mean": 11017.7, "valid_targets_min": 7406 }, { "epoch": 2.8395061728395063, "grad_norm": 0.22579559038440425, "learning_rate": 2.9785858338678474e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356910616159439, "step": 230, "valid_targets_mean": 10416.5, "valid_targets_min": 5935 }, { "epoch": 2.9012345679012346, "grad_norm": 0.259395872875029, "learning_rate": 2.924407767080627e-05, "loss": 0.3952, "loss_nan_ranks": 0, "loss_rank_avg": 0.13255149126052856, "step": 235, "valid_targets_mean": 10616.9, "valid_targets_min": 452 }, { "epoch": 2.962962962962963, "grad_norm": 0.2505789103446424, "learning_rate": 2.8693528435319304e-05, "loss": 0.3927, "loss_nan_ranks": 0, "loss_rank_avg": 0.13311100006103516, "step": 240, "valid_targets_mean": 10259.8, "valid_targets_min": 5469 }, { "epoch": 3.0246913580246915, "grad_norm": 0.25985101425137663, "learning_rate": 2.813473286151601e-05, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.1392827033996582, "step": 245, "valid_targets_mean": 10576.5, "valid_targets_min": 6382 }, { "epoch": 3.0864197530864197, "grad_norm": 0.2523285023282264, "learning_rate": 2.756822100084621e-05, "loss": 0.3882, "loss_nan_ranks": 0, "loss_rank_avg": 0.13310596346855164, "step": 250, "valid_targets_mean": 10070.9, "valid_targets_min": 5036 }, { "epoch": 3.148148148148148, "grad_norm": 0.24591905608450526, "learning_rate": 2.6994530224125225e-05, "loss": 0.3876, "loss_nan_ranks": 0, "loss_rank_avg": 0.12935864925384521, "step": 255, "valid_targets_mean": 9908.3, "valid_targets_min": 5504 }, { "epoch": 3.2098765432098766, "grad_norm": 0.23137220587321322, "learning_rate": 2.6414204711805106e-05, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.12253047525882721, "step": 260, "valid_targets_mean": 9940.8, "valid_targets_min": 5580 }, { "epoch": 3.271604938271605, "grad_norm": 0.22538191153928455, "learning_rate": 2.5827794937786497e-05, "loss": 0.3896, "loss_nan_ranks": 0, "loss_rank_avg": 0.12234168499708176, "step": 265, "valid_targets_mean": 9346.2, "valid_targets_min": 2359 }, { "epoch": 3.3333333333333335, "grad_norm": 0.23311402995020197, "learning_rate": 2.523585714726081e-05, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.1369541585445404, "step": 270, "valid_targets_mean": 10217.0, "valid_targets_min": 5332 }, { "epoch": 3.3950617283950617, "grad_norm": 0.2514619172750129, "learning_rate": 2.4638952829077964e-05, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.1313236802816391, "step": 275, "valid_targets_mean": 10161.1, "valid_targets_min": 5918 }, { "epoch": 3.45679012345679, "grad_norm": 0.21958361450520822, "learning_rate": 2.4037648183140205e-05, "loss": 0.3813, "loss_nan_ranks": 0, "loss_rank_avg": 0.12724569439888, "step": 280, "valid_targets_mean": 10332.4, "valid_targets_min": 6695 }, { "epoch": 3.5185185185185186, "grad_norm": 0.2059815034095156, "learning_rate": 2.3432513583327198e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.12159313261508942, "step": 285, "valid_targets_mean": 9543.0, "valid_targets_min": 5540 }, { "epoch": 3.580246913580247, "grad_norm": 0.2172755088091282, "learning_rate": 2.282412303646183e-05, "loss": 0.3824, "loss_nan_ranks": 0, "loss_rank_avg": 0.13618609309196472, "step": 290, "valid_targets_mean": 10702.6, "valid_targets_min": 5676 }, { "epoch": 3.6419753086419755, "grad_norm": 0.23724659970939752, "learning_rate": 2.2213053637830016e-05, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.13978168368339539, "step": 295, "valid_targets_mean": 10434.6, "valid_targets_min": 6350 }, { "epoch": 3.7037037037037037, "grad_norm": 0.21912152242726737, "learning_rate": 2.1599885023770833e-05, "loss": 0.3791, "loss_nan_ranks": 0, "loss_rank_avg": 0.1349799931049347, "step": 300, "valid_targets_mean": 10565.1, "valid_targets_min": 6091 }, { "epoch": 3.765432098765432, "grad_norm": 0.23294883680524386, "learning_rate": 2.098519882185634e-05, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.12474770843982697, "step": 305, "valid_targets_mean": 9782.5, "valid_targets_min": 6639 }, { "epoch": 3.8271604938271606, "grad_norm": 0.210413727788084, "learning_rate": 2.03695780991826e-05, "loss": 0.3793, "loss_nan_ranks": 0, "loss_rank_avg": 0.12241166830062866, "step": 310, "valid_targets_mean": 9305.9, "valid_targets_min": 452 }, { "epoch": 3.888888888888889, "grad_norm": 0.2336083067719978, "learning_rate": 1.9753606809295234e-05, "loss": 0.3817, "loss_nan_ranks": 0, "loss_rank_avg": 0.1337265968322754, "step": 315, "valid_targets_mean": 10231.2, "valid_targets_min": 5605 }, { "epoch": 3.950617283950617, "grad_norm": 0.22993388632584225, "learning_rate": 1.9137869238274095e-05, "loss": 0.3867, "loss_nan_ranks": 0, "loss_rank_avg": 0.12258920818567276, "step": 320, "valid_targets_mean": 9413.1, "valid_targets_min": 3903 }, { "epoch": 4.012345679012346, "grad_norm": 0.23931040818992602, "learning_rate": 1.8522949450502522e-05, "loss": 0.385, "loss_nan_ranks": 0, "loss_rank_avg": 0.11768842488527298, "step": 325, "valid_targets_mean": 10013.8, "valid_targets_min": 6307 }, { "epoch": 4.074074074074074, "grad_norm": 0.23366624804084687, "learning_rate": 1.7909430734646936e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.12187112867832184, "step": 330, "valid_targets_mean": 9138.8, "valid_targets_min": 405 }, { "epoch": 4.135802469135802, "grad_norm": 0.2386159155105413, "learning_rate": 1.7297895050372147e-05, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.11817651987075806, "step": 335, "valid_targets_mean": 9925.3, "valid_targets_min": 5315 }, { "epoch": 4.197530864197531, "grad_norm": 0.24141329497025246, "learning_rate": 1.66889224763174e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.12400516867637634, "step": 340, "valid_targets_mean": 10173.1, "valid_targets_min": 6521 }, { "epoch": 4.2592592592592595, "grad_norm": 0.21961371398861715, "learning_rate": 1.6083090659856665e-05, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.12499363720417023, "step": 345, "valid_targets_mean": 9886.8, "valid_targets_min": 3903 }, { "epoch": 4.320987654320987, "grad_norm": 0.20691809860193006, "learning_rate": 1.5480974269165053e-05, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.12656159698963165, "step": 350, "valid_targets_mean": 10278.9, "valid_targets_min": 5947 }, { "epoch": 4.382716049382716, "grad_norm": 0.2080579551581286, "learning_rate": 1.4883144448111288e-05, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.12579810619354248, "step": 355, "valid_targets_mean": 9957.8, "valid_targets_min": 6975 }, { "epoch": 4.444444444444445, "grad_norm": 0.22870148650025382, "learning_rate": 1.4290168274493161e-05, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.12021129578351974, "step": 360, "valid_targets_mean": 9520.9, "valid_targets_min": 4051 }, { "epoch": 4.506172839506172, "grad_norm": 0.21233421060501143, "learning_rate": 1.3702608222129845e-05, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.13181990385055542, "step": 365, "valid_targets_mean": 9865.2, "valid_targets_min": 2359 }, { "epoch": 4.567901234567901, "grad_norm": 0.21174790278305963, "learning_rate": 1.3121021627321438e-05, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.11894433945417404, "step": 370, "valid_targets_mean": 9994.8, "valid_targets_min": 6103 }, { "epoch": 4.62962962962963, "grad_norm": 0.22880042834392986, "learning_rate": 1.254596016018172e-05, "loss": 0.3718, "loss_nan_ranks": 0, "loss_rank_avg": 0.11400240659713745, "step": 375, "valid_targets_mean": 9684.1, "valid_targets_min": 1809 }, { "epoch": 4.6913580246913575, "grad_norm": 0.20714503394784406, "learning_rate": 1.1977969301345627e-05, "loss": 0.3741, "loss_nan_ranks": 0, "loss_rank_avg": 0.12407234311103821, "step": 380, "valid_targets_mean": 10558.5, "valid_targets_min": 3563 }, { "epoch": 4.753086419753086, "grad_norm": 0.2245999439376979, "learning_rate": 1.1417587824547822e-05, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.11711939424276352, "step": 385, "valid_targets_mean": 10091.3, "valid_targets_min": 5831 }, { "epoch": 4.814814814814815, "grad_norm": 0.19667919626855399, "learning_rate": 1.086534728556319e-05, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273461878299713, "step": 390, "valid_targets_mean": 9572.9, "valid_targets_min": 4151 }, { "epoch": 4.8765432098765435, "grad_norm": 0.21765308723193594, "learning_rate": 1.032177151799397e-05, "loss": 0.3723, "loss_nan_ranks": 0, "loss_rank_avg": 0.12290293723344803, "step": 395, "valid_targets_mean": 10243.7, "valid_targets_min": 403 }, { "epoch": 4.938271604938271, "grad_norm": 0.2135466709612721, "learning_rate": 9.787376136381866e-06, "loss": 0.3801, "loss_nan_ranks": 0, "loss_rank_avg": 0.12904667854309082, "step": 400, "valid_targets_mean": 10656.8, "valid_targets_min": 5102 }, { "epoch": 5.0, "grad_norm": 0.2361103567960002, "learning_rate": 9.262668047116399e-06, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.11932142823934555, "step": 405, "valid_targets_mean": 9562.3, "valid_targets_min": 5152 }, { "epoch": 5.061728395061729, "grad_norm": 0.19675209800473384, "learning_rate": 8.748144967603538e-06, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.12642104923725128, "step": 410, "valid_targets_mean": 9635.7, "valid_targets_min": 3304 }, { "epoch": 5.1234567901234565, "grad_norm": 0.19935243749779888, "learning_rate": 8.24429495415054e-06, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.1311061680316925, "step": 415, "valid_targets_mean": 10193.9, "valid_targets_min": 4731 }, { "epoch": 5.185185185185185, "grad_norm": 0.20563411967663517, "learning_rate": 7.751595939015005e-06, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.12725985050201416, "step": 420, "valid_targets_mean": 9884.4, "valid_targets_min": 6098 }, { "epoch": 5.246913580246914, "grad_norm": 0.23745531664425062, "learning_rate": 7.270515277057178e-06, "loss": 0.3704, "loss_nan_ranks": 0, "loss_rank_avg": 0.12347559630870819, "step": 425, "valid_targets_mean": 10205.5, "valid_targets_min": 4462 }, { "epoch": 5.308641975308642, "grad_norm": 0.1974181070193832, "learning_rate": 6.801509302425553e-06, "loss": 0.3728, "loss_nan_ranks": 0, "loss_rank_avg": 0.13114777207374573, "step": 430, "valid_targets_mean": 10419.2, "valid_targets_min": 6888 }, { "epoch": 5.37037037037037, "grad_norm": 0.20324970189648778, "learning_rate": 6.3450228956962915e-06, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.12115702033042908, "step": 435, "valid_targets_mean": 9735.0, "valid_targets_min": 452 }, { "epoch": 5.432098765432099, "grad_norm": 0.21598912192747013, "learning_rate": 5.90148906187706e-06, "loss": 0.3716, "loss_nan_ranks": 0, "loss_rank_avg": 0.1321217566728592, "step": 440, "valid_targets_mean": 10511.7, "valid_targets_min": 6367 }, { "epoch": 5.493827160493828, "grad_norm": 0.18869463173340542, "learning_rate": 5.471328519675521e-06, "loss": 0.3706, "loss_nan_ranks": 0, "loss_rank_avg": 0.11856191605329514, "step": 445, "valid_targets_mean": 10337.3, "valid_targets_min": 6856 }, { "epoch": 5.555555555555555, "grad_norm": 0.3057251506093963, "learning_rate": 5.054949302422178e-06, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.1269569993019104, "step": 450, "valid_targets_mean": 10418.6, "valid_targets_min": 7562 }, { "epoch": 5.617283950617284, "grad_norm": 0.18558587270619328, "learning_rate": 4.65274637102606e-06, "loss": 0.37, "loss_nan_ranks": 0, "loss_rank_avg": 0.12654009461402893, "step": 455, "valid_targets_mean": 10451.2, "valid_targets_min": 6890 }, { "epoch": 5.679012345679013, "grad_norm": 0.18925832500358447, "learning_rate": 4.265101239330336e-06, "loss": 0.3692, "loss_nan_ranks": 0, "loss_rank_avg": 0.11512520909309387, "step": 460, "valid_targets_mean": 9696.8, "valid_targets_min": 6270 }, { "epoch": 5.7407407407407405, "grad_norm": 0.2123661843964898, "learning_rate": 3.892381612223348e-06, "loss": 0.3731, "loss_nan_ranks": 0, "loss_rank_avg": 0.12849582731723785, "step": 465, "valid_targets_mean": 9772.2, "valid_targets_min": 5044 }, { "epoch": 5.802469135802469, "grad_norm": 0.19388821506347134, "learning_rate": 3.534941036848258e-06, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.11666221916675568, "step": 470, "valid_targets_mean": 9419.0, "valid_targets_min": 5724 }, { "epoch": 5.864197530864198, "grad_norm": 0.19304850153428105, "learning_rate": 3.193118567242148e-06, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.12014692276716232, "step": 475, "valid_targets_mean": 10304.2, "valid_targets_min": 6152 }, { "epoch": 5.925925925925926, "grad_norm": 0.20024207191873417, "learning_rate": 2.8672384427227484e-06, "loss": 0.3728, "loss_nan_ranks": 0, "loss_rank_avg": 0.13074594736099243, "step": 480, "valid_targets_mean": 10167.5, "valid_targets_min": 4046 }, { "epoch": 5.987654320987654, "grad_norm": 0.18822272097106288, "learning_rate": 2.5576097803277833e-06, "loss": 0.3705, "loss_nan_ranks": 0, "loss_rank_avg": 0.12458598613739014, "step": 485, "valid_targets_mean": 9746.9, "valid_targets_min": 5329 }, { "epoch": 6.049382716049383, "grad_norm": 0.1920896017408439, "learning_rate": 2.264526281598762e-06, "loss": 0.3731, "loss_nan_ranks": 0, "loss_rank_avg": 0.11540110409259796, "step": 490, "valid_targets_mean": 9151.7, "valid_targets_min": 2545 }, { "epoch": 6.111111111111111, "grad_norm": 0.1963272832970611, "learning_rate": 1.988265953987254e-06, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.1279304027557373, "step": 495, "valid_targets_mean": 10241.9, "valid_targets_min": 6053 }, { "epoch": 6.172839506172839, "grad_norm": 0.19192168324701758, "learning_rate": 1.7290908471479805e-06, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.11920693516731262, "step": 500, "valid_targets_mean": 9656.2, "valid_targets_min": 3898 }, { "epoch": 6.234567901234568, "grad_norm": 0.1838675547975408, "learning_rate": 1.487246804368876e-06, "loss": 0.3665, "loss_nan_ranks": 0, "loss_rank_avg": 0.1233929991722107, "step": 505, "valid_targets_mean": 10345.6, "valid_targets_min": 5701 }, { "epoch": 6.296296296296296, "grad_norm": 0.18797572906177723, "learning_rate": 1.2629632293737903e-06, "loss": 0.3707, "loss_nan_ranks": 0, "loss_rank_avg": 0.12090076506137848, "step": 510, "valid_targets_mean": 9881.3, "valid_targets_min": 6218 }, { "epoch": 6.3580246913580245, "grad_norm": 0.20133771310875587, "learning_rate": 1.0564528687191954e-06, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.12166153639554977, "step": 515, "valid_targets_mean": 9838.0, "valid_targets_min": 5384 }, { "epoch": 6.419753086419753, "grad_norm": 0.19437281332257697, "learning_rate": 8.679116099911855e-07, "loss": 0.3682, "loss_nan_ranks": 0, "loss_rank_avg": 0.12425738573074341, "step": 520, "valid_targets_mean": 10094.1, "valid_targets_min": 5805 }, { "epoch": 6.481481481481482, "grad_norm": 0.20313423058358762, "learning_rate": 6.975182959942195e-07, "loss": 0.3647, "loss_nan_ranks": 0, "loss_rank_avg": 0.12808644771575928, "step": 525, "valid_targets_mean": 10183.5, "valid_targets_min": 6429 }, { "epoch": 6.54320987654321, "grad_norm": 0.19806939309627028, "learning_rate": 5.454345551079044e-07, "loss": 0.371, "loss_nan_ranks": 0, "loss_rank_avg": 0.11769580841064453, "step": 530, "valid_targets_mean": 10095.5, "valid_targets_min": 6403 }, { "epoch": 6.604938271604938, "grad_norm": 0.19162098246941137, "learning_rate": 4.118046479726823e-07, "loss": 0.3693, "loss_nan_ranks": 0, "loss_rank_avg": 0.12202047556638718, "step": 535, "valid_targets_mean": 10323.8, "valid_targets_min": 6837 }, { "epoch": 6.666666666666667, "grad_norm": 0.18761681557750176, "learning_rate": 2.9675533064986937e-07, "loss": 0.3699, "loss_nan_ranks": 0, "loss_rank_avg": 0.1217864602804184, "step": 540, "valid_targets_mean": 9897.5, "valid_targets_min": 5402 }, { "epoch": 6.728395061728395, "grad_norm": 0.19040954143447755, "learning_rate": 2.0039573438586091e-07, "loss": 0.3682, "loss_nan_ranks": 0, "loss_rank_avg": 0.1232626885175705, "step": 545, "valid_targets_mean": 10269.8, "valid_targets_min": 6322 }, { "epoch": 6.790123456790123, "grad_norm": 0.1840751374024284, "learning_rate": 1.2281726209452782e-07, "loss": 0.3704, "loss_nan_ranks": 0, "loss_rank_avg": 0.1274663656949997, "step": 550, "valid_targets_mean": 10375.0, "valid_targets_min": 5210 }, { "epoch": 6.851851851851852, "grad_norm": 0.1884444000769013, "learning_rate": 6.409350165601957e-08, "loss": 0.3645, "loss_nan_ranks": 0, "loss_rank_avg": 0.11835850775241852, "step": 555, "valid_targets_mean": 9355.1, "valid_targets_min": 5793 }, { "epoch": 6.91358024691358, "grad_norm": 0.18733829002548932, "learning_rate": 2.4280156114202537e-08, "loss": 0.3741, "loss_nan_ranks": 0, "loss_rank_avg": 0.12829899787902832, "step": 560, "valid_targets_mean": 9559.5, "valid_targets_min": 4937 }, { "epoch": 6.9753086419753085, "grad_norm": 0.1898364388549884, "learning_rate": 3.414990838945809e-09, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.122779980301857, "step": 565, "valid_targets_mean": 10042.1, "valid_targets_min": 5310 }, { "epoch": 7.0, "step": 567, "total_flos": 2.4619618330916946e+18, "train_loss": 0.0, "train_runtime": 6.5836, "train_samples_per_second": 8267.793, "train_steps_per_second": 86.123 } ], "logging_steps": 5, "max_steps": 567, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4619618330916946e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }