{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006771626883358727, "grad_norm": 1.5234375, "learning_rate": 6.081081081081082e-07, "loss": 1.8358331680297852, "step": 10 }, { "epoch": 0.013543253766717453, "grad_norm": 1.5078125, "learning_rate": 1.2837837837837838e-06, "loss": 1.840726089477539, "step": 20 }, { "epoch": 0.02031488065007618, "grad_norm": 1.0859375, "learning_rate": 1.9594594594594595e-06, "loss": 1.8267410278320313, "step": 30 }, { "epoch": 0.027086507533434907, "grad_norm": 1.1640625, "learning_rate": 2.6351351351351353e-06, "loss": 1.8383310317993165, "step": 40 }, { "epoch": 0.03385813441679363, "grad_norm": 1.0859375, "learning_rate": 3.310810810810811e-06, "loss": 1.8384885787963867, "step": 50 }, { "epoch": 0.04062976130015236, "grad_norm": 1.03125, "learning_rate": 3.986486486486487e-06, "loss": 1.8087802886962892, "step": 60 }, { "epoch": 0.04740138818351109, "grad_norm": 1.015625, "learning_rate": 4.6621621621621625e-06, "loss": 1.8259227752685547, "step": 70 }, { "epoch": 0.05417301506686981, "grad_norm": 1.046875, "learning_rate": 5.337837837837838e-06, "loss": 1.8241001129150392, "step": 80 }, { "epoch": 0.06094464195022854, "grad_norm": 0.96484375, "learning_rate": 6.013513513513514e-06, "loss": 1.82220516204834, "step": 90 }, { "epoch": 0.06771626883358726, "grad_norm": 0.953125, "learning_rate": 6.689189189189191e-06, "loss": 1.7921783447265625, "step": 100 }, { "epoch": 0.074487895716946, "grad_norm": 0.9296875, "learning_rate": 7.3648648648648655e-06, "loss": 1.797548484802246, "step": 110 }, { "epoch": 0.08125952260030472, "grad_norm": 0.89453125, "learning_rate": 8.040540540540541e-06, "loss": 1.7889528274536133, "step": 120 }, { "epoch": 0.08803114948366345, "grad_norm": 0.90234375, "learning_rate": 8.716216216216217e-06, "loss": 1.7663179397583009, "step": 130 }, { "epoch": 0.09480277636702218, "grad_norm": 0.89453125, "learning_rate": 9.391891891891893e-06, "loss": 1.7635225296020507, "step": 140 }, { "epoch": 0.1015744032503809, "grad_norm": 0.91015625, "learning_rate": 9.999986030219255e-06, "loss": 1.7774492263793946, "step": 150 }, { "epoch": 0.10834603013373963, "grad_norm": 0.91796875, "learning_rate": 9.998309750982693e-06, "loss": 1.7622718811035156, "step": 160 }, { "epoch": 0.11511765701709836, "grad_norm": 0.890625, "learning_rate": 9.993840588849743e-06, "loss": 1.7750001907348634, "step": 170 }, { "epoch": 0.12188928390045708, "grad_norm": 0.890625, "learning_rate": 9.986581041033881e-06, "loss": 1.767216110229492, "step": 180 }, { "epoch": 0.1286609107838158, "grad_norm": 0.921875, "learning_rate": 9.976535163919757e-06, "loss": 1.7609657287597655, "step": 190 }, { "epoch": 0.13543253766717453, "grad_norm": 0.87109375, "learning_rate": 9.96370857079661e-06, "loss": 1.7535722732543946, "step": 200 }, { "epoch": 0.14220416455053325, "grad_norm": 0.86328125, "learning_rate": 9.948108428721782e-06, "loss": 1.7395360946655274, "step": 210 }, { "epoch": 0.148975791433892, "grad_norm": 0.88671875, "learning_rate": 9.92974345451598e-06, "loss": 1.7465991973876953, "step": 220 }, { "epoch": 0.15574741831725072, "grad_norm": 0.87890625, "learning_rate": 9.908623909892651e-06, "loss": 1.7506902694702149, "step": 230 }, { "epoch": 0.16251904520060945, "grad_norm": 0.8984375, "learning_rate": 9.884761595724068e-06, "loss": 1.7368896484375, "step": 240 }, { "epoch": 0.16929067208396817, "grad_norm": 0.8671875, "learning_rate": 9.858169845447417e-06, "loss": 1.7515613555908203, "step": 250 }, { "epoch": 0.1760622989673269, "grad_norm": 0.85546875, "learning_rate": 9.828863517614533e-06, "loss": 1.7509956359863281, "step": 260 }, { "epoch": 0.1828339258506856, "grad_norm": 0.9140625, "learning_rate": 9.796858987589462e-06, "loss": 1.753628921508789, "step": 270 }, { "epoch": 0.18960555273404436, "grad_norm": 0.85546875, "learning_rate": 9.762174138398456e-06, "loss": 1.7379936218261718, "step": 280 }, { "epoch": 0.19637717961740309, "grad_norm": 0.88671875, "learning_rate": 9.724828350737574e-06, "loss": 1.7442964553833007, "step": 290 }, { "epoch": 0.2031488065007618, "grad_norm": 0.87109375, "learning_rate": 9.684842492143399e-06, "loss": 1.7366142272949219, "step": 300 }, { "epoch": 0.20992043338412053, "grad_norm": 0.84765625, "learning_rate": 9.642238905333e-06, "loss": 1.7396051406860351, "step": 310 }, { "epoch": 0.21669206026747925, "grad_norm": 0.87109375, "learning_rate": 9.597041395719573e-06, "loss": 1.732611083984375, "step": 320 }, { "epoch": 0.22346368715083798, "grad_norm": 0.8828125, "learning_rate": 9.549275218110818e-06, "loss": 1.7453182220458985, "step": 330 }, { "epoch": 0.23023531403419673, "grad_norm": 0.875, "learning_rate": 9.498967062597403e-06, "loss": 1.7297761917114258, "step": 340 }, { "epoch": 0.23700694091755545, "grad_norm": 0.875, "learning_rate": 9.446145039639486e-06, "loss": 1.728118324279785, "step": 350 }, { "epoch": 0.24377856780091417, "grad_norm": 0.890625, "learning_rate": 9.390838664359539e-06, "loss": 1.7387624740600587, "step": 360 }, { "epoch": 0.2505501946842729, "grad_norm": 0.85546875, "learning_rate": 9.333078840050331e-06, "loss": 1.7364713668823242, "step": 370 }, { "epoch": 0.2573218215676316, "grad_norm": 0.8828125, "learning_rate": 9.27289784090723e-06, "loss": 1.7236080169677734, "step": 380 }, { "epoch": 0.26409344845099036, "grad_norm": 0.890625, "learning_rate": 9.210329293994495e-06, "loss": 1.7224924087524414, "step": 390 }, { "epoch": 0.27086507533434906, "grad_norm": 0.8671875, "learning_rate": 9.145408160455642e-06, "loss": 1.7099193572998046, "step": 400 }, { "epoch": 0.2776367022177078, "grad_norm": 0.8515625, "learning_rate": 9.078170715978353e-06, "loss": 1.737176513671875, "step": 410 }, { "epoch": 0.2844083291010665, "grad_norm": 0.9140625, "learning_rate": 9.008654530524883e-06, "loss": 1.73763427734375, "step": 420 }, { "epoch": 0.29117995598442525, "grad_norm": 0.85546875, "learning_rate": 8.936898447339257e-06, "loss": 1.7290821075439453, "step": 430 }, { "epoch": 0.297951582867784, "grad_norm": 0.8984375, "learning_rate": 8.86294256124301e-06, "loss": 1.7403568267822265, "step": 440 }, { "epoch": 0.3047232097511427, "grad_norm": 0.859375, "learning_rate": 8.786828196231584e-06, "loss": 1.7217792510986327, "step": 450 }, { "epoch": 0.31149483663450145, "grad_norm": 0.87109375, "learning_rate": 8.708597882383908e-06, "loss": 1.7103708267211915, "step": 460 }, { "epoch": 0.31826646351786014, "grad_norm": 0.91796875, "learning_rate": 8.62829533209805e-06, "loss": 1.7208784103393555, "step": 470 }, { "epoch": 0.3250380904012189, "grad_norm": 0.859375, "learning_rate": 8.545965415666254e-06, "loss": 1.7223230361938477, "step": 480 }, { "epoch": 0.33180971728457764, "grad_norm": 0.8671875, "learning_rate": 8.46165413620295e-06, "loss": 1.719701385498047, "step": 490 }, { "epoch": 0.33858134416793634, "grad_norm": 0.85546875, "learning_rate": 8.375408603939827e-06, "loss": 1.721092987060547, "step": 500 }, { "epoch": 0.33858134416793634, "eval_loss": 1.7143864631652832, "eval_runtime": 177.179, "eval_samples_per_second": 5.401, "eval_steps_per_second": 0.677, "step": 500 }, { "epoch": 0.3453529710512951, "grad_norm": 0.859375, "learning_rate": 8.287277009902237e-06, "loss": 1.7325265884399415, "step": 510 }, { "epoch": 0.3521245979346538, "grad_norm": 0.83984375, "learning_rate": 8.197308598981731e-06, "loss": 1.7298921585083007, "step": 520 }, { "epoch": 0.35889622481801253, "grad_norm": 0.8828125, "learning_rate": 8.105553642419708e-06, "loss": 1.6982412338256836, "step": 530 }, { "epoch": 0.3656678517013712, "grad_norm": 0.91015625, "learning_rate": 8.012063409717578e-06, "loss": 1.7173789978027343, "step": 540 }, { "epoch": 0.37243947858473, "grad_norm": 0.875, "learning_rate": 7.916890139989147e-06, "loss": 1.724541473388672, "step": 550 }, { "epoch": 0.3792111054680887, "grad_norm": 0.859375, "learning_rate": 7.820087012771184e-06, "loss": 1.701674461364746, "step": 560 }, { "epoch": 0.3859827323514474, "grad_norm": 0.85546875, "learning_rate": 7.721708118308556e-06, "loss": 1.7177881240844726, "step": 570 }, { "epoch": 0.39275435923480617, "grad_norm": 0.87890625, "learning_rate": 7.621808427330447e-06, "loss": 1.6985021591186524, "step": 580 }, { "epoch": 0.39952598611816487, "grad_norm": 0.87109375, "learning_rate": 7.5204437603346224e-06, "loss": 1.709127426147461, "step": 590 }, { "epoch": 0.4062976130015236, "grad_norm": 0.88671875, "learning_rate": 7.417670756396863e-06, "loss": 1.7201419830322267, "step": 600 }, { "epoch": 0.41306923988488237, "grad_norm": 0.8984375, "learning_rate": 7.313546841522998e-06, "loss": 1.7153247833251952, "step": 610 }, { "epoch": 0.41984086676824106, "grad_norm": 0.875, "learning_rate": 7.2081301965612435e-06, "loss": 1.707881546020508, "step": 620 }, { "epoch": 0.4266124936515998, "grad_norm": 0.87109375, "learning_rate": 7.10147972469275e-06, "loss": 1.7271339416503906, "step": 630 }, { "epoch": 0.4333841205349585, "grad_norm": 1.3515625, "learning_rate": 6.993655018518541e-06, "loss": 1.7222976684570312, "step": 640 }, { "epoch": 0.44015574741831726, "grad_norm": 0.85546875, "learning_rate": 6.884716326761218e-06, "loss": 1.7006675720214843, "step": 650 }, { "epoch": 0.44692737430167595, "grad_norm": 0.87109375, "learning_rate": 6.774724520600069e-06, "loss": 1.6978439331054687, "step": 660 }, { "epoch": 0.4536990011850347, "grad_norm": 0.87890625, "learning_rate": 6.663741059658337e-06, "loss": 1.7124168395996093, "step": 670 }, { "epoch": 0.46047062806839345, "grad_norm": 0.87890625, "learning_rate": 6.551827957661722e-06, "loss": 1.7023361206054688, "step": 680 }, { "epoch": 0.46724225495175215, "grad_norm": 0.86328125, "learning_rate": 6.439047747787242e-06, "loss": 1.700748825073242, "step": 690 }, { "epoch": 0.4740138818351109, "grad_norm": 0.85546875, "learning_rate": 6.325463447721852e-06, "loss": 1.6977190017700194, "step": 700 }, { "epoch": 0.4807855087184696, "grad_norm": 0.8984375, "learning_rate": 6.211138524450347e-06, "loss": 1.7250362396240235, "step": 710 }, { "epoch": 0.48755713560182834, "grad_norm": 0.90234375, "learning_rate": 6.096136858792193e-06, "loss": 1.7249008178710938, "step": 720 }, { "epoch": 0.4943287624851871, "grad_norm": 0.8671875, "learning_rate": 5.980522709707132e-06, "loss": 1.7153186798095703, "step": 730 }, { "epoch": 0.5011003893685458, "grad_norm": 0.8828125, "learning_rate": 5.864360678389497e-06, "loss": 1.6841873168945312, "step": 740 }, { "epoch": 0.5078720162519045, "grad_norm": 0.8515625, "learning_rate": 5.747715672171295e-06, "loss": 1.7151117324829102, "step": 750 }, { "epoch": 0.5146436431352632, "grad_norm": 0.95703125, "learning_rate": 5.630652868254229e-06, "loss": 1.704267692565918, "step": 760 }, { "epoch": 0.521415270018622, "grad_norm": 0.88671875, "learning_rate": 5.51323767729093e-06, "loss": 1.7240329742431642, "step": 770 }, { "epoch": 0.5281868969019807, "grad_norm": 0.87890625, "learning_rate": 5.395535706835744e-06, "loss": 1.7058921813964845, "step": 780 }, { "epoch": 0.5349585237853395, "grad_norm": 0.8828125, "learning_rate": 5.27761272468549e-06, "loss": 1.6999113082885742, "step": 790 }, { "epoch": 0.5417301506686981, "grad_norm": 0.9140625, "learning_rate": 5.159534622130695e-06, "loss": 1.7173538208007812, "step": 800 }, { "epoch": 0.5485017775520569, "grad_norm": 0.85546875, "learning_rate": 5.04136737713781e-06, "loss": 1.706464958190918, "step": 810 }, { "epoch": 0.5552734044354156, "grad_norm": 0.84765625, "learning_rate": 4.923177017483002e-06, "loss": 1.7123580932617188, "step": 820 }, { "epoch": 0.5620450313187744, "grad_norm": 0.84765625, "learning_rate": 4.805029583858115e-06, "loss": 1.7076505661010741, "step": 830 }, { "epoch": 0.568816658202133, "grad_norm": 0.87109375, "learning_rate": 4.686991092969408e-06, "loss": 1.7007432937622071, "step": 840 }, { "epoch": 0.5755882850854918, "grad_norm": 0.83984375, "learning_rate": 4.569127500649701e-06, "loss": 1.7156892776489259, "step": 850 }, { "epoch": 0.5823599119688505, "grad_norm": 0.85546875, "learning_rate": 4.4515046650045316e-06, "loss": 1.6989547729492187, "step": 860 }, { "epoch": 0.5891315388522093, "grad_norm": 0.859375, "learning_rate": 4.334188309612923e-06, "loss": 1.701683235168457, "step": 870 }, { "epoch": 0.595903165735568, "grad_norm": 0.875, "learning_rate": 4.217243986803315e-06, "loss": 1.7004409790039063, "step": 880 }, { "epoch": 0.6026747926189266, "grad_norm": 0.88671875, "learning_rate": 4.100737041025188e-06, "loss": 1.727794075012207, "step": 890 }, { "epoch": 0.6094464195022854, "grad_norm": 0.89453125, "learning_rate": 3.984732572336837e-06, "loss": 1.6976716995239258, "step": 900 }, { "epoch": 0.6162180463856441, "grad_norm": 0.89453125, "learning_rate": 3.869295400029714e-06, "loss": 1.6927717208862305, "step": 910 }, { "epoch": 0.6229896732690029, "grad_norm": 0.84375, "learning_rate": 3.754490026409637e-06, "loss": 1.6997186660766601, "step": 920 }, { "epoch": 0.6297613001523616, "grad_norm": 0.93359375, "learning_rate": 3.6403806007551373e-06, "loss": 1.7196897506713866, "step": 930 }, { "epoch": 0.6365329270357203, "grad_norm": 0.83203125, "learning_rate": 3.527030883473055e-06, "loss": 1.7054462432861328, "step": 940 }, { "epoch": 0.643304553919079, "grad_norm": 0.890625, "learning_rate": 3.414504210471421e-06, "loss": 1.7200759887695312, "step": 950 }, { "epoch": 0.6500761808024378, "grad_norm": 0.890625, "learning_rate": 3.302863457769544e-06, "loss": 1.6951274871826172, "step": 960 }, { "epoch": 0.6568478076857965, "grad_norm": 0.90625, "learning_rate": 3.192171006365061e-06, "loss": 1.7151849746704102, "step": 970 }, { "epoch": 0.6636194345691553, "grad_norm": 0.8984375, "learning_rate": 3.0824887073775877e-06, "loss": 1.713322067260742, "step": 980 }, { "epoch": 0.6703910614525139, "grad_norm": 0.83984375, "learning_rate": 2.973877847488451e-06, "loss": 1.7172536849975586, "step": 990 }, { "epoch": 0.6771626883358727, "grad_norm": 0.859375, "learning_rate": 2.8663991146958064e-06, "loss": 1.7149576187133788, "step": 1000 }, { "epoch": 0.6771626883358727, "eval_loss": 1.7007688283920288, "eval_runtime": 165.432, "eval_samples_per_second": 5.785, "eval_steps_per_second": 0.725, "step": 1000 }, { "epoch": 0.6839343152192314, "grad_norm": 0.90625, "learning_rate": 2.7601125644042777e-06, "loss": 1.714142417907715, "step": 1010 }, { "epoch": 0.6907059421025902, "grad_norm": 0.859375, "learning_rate": 2.6550775858680793e-06, "loss": 1.7104360580444335, "step": 1020 }, { "epoch": 0.6974775689859489, "grad_norm": 0.90234375, "learning_rate": 2.551352869006338e-06, "loss": 1.7032684326171874, "step": 1030 }, { "epoch": 0.7042491958693076, "grad_norm": 0.86328125, "learning_rate": 2.4489963716092096e-06, "loss": 1.701323890686035, "step": 1040 }, { "epoch": 0.7110208227526663, "grad_norm": 0.890625, "learning_rate": 2.348065286953048e-06, "loss": 1.7169862747192384, "step": 1050 }, { "epoch": 0.7177924496360251, "grad_norm": 0.87890625, "learning_rate": 2.2486160118427958e-06, "loss": 1.701096534729004, "step": 1060 }, { "epoch": 0.7245640765193838, "grad_norm": 0.88671875, "learning_rate": 2.1507041150993813e-06, "loss": 1.700172233581543, "step": 1070 }, { "epoch": 0.7313357034027425, "grad_norm": 0.859375, "learning_rate": 2.054384306509794e-06, "loss": 1.7045093536376954, "step": 1080 }, { "epoch": 0.7381073302861012, "grad_norm": 0.859375, "learning_rate": 1.9597104062571337e-06, "loss": 1.7091920852661133, "step": 1090 }, { "epoch": 0.74487895716946, "grad_norm": 0.86328125, "learning_rate": 1.8667353148477547e-06, "loss": 1.7001871109008788, "step": 1100 }, { "epoch": 0.7516505840528187, "grad_norm": 0.85546875, "learning_rate": 1.7755109835522938e-06, "loss": 1.7016315460205078, "step": 1110 }, { "epoch": 0.7584222109361775, "grad_norm": 0.87890625, "learning_rate": 1.6860883853770848e-06, "loss": 1.7196449279785155, "step": 1120 }, { "epoch": 0.7651938378195361, "grad_norm": 0.89453125, "learning_rate": 1.5985174865822146e-06, "loss": 1.701955223083496, "step": 1130 }, { "epoch": 0.7719654647028948, "grad_norm": 0.85546875, "learning_rate": 1.5128472187620886e-06, "loss": 1.703407096862793, "step": 1140 }, { "epoch": 0.7787370915862536, "grad_norm": 0.875, "learning_rate": 1.4291254515041592e-06, "loss": 1.7057323455810547, "step": 1150 }, { "epoch": 0.7855087184696123, "grad_norm": 0.8828125, "learning_rate": 1.3473989656410413e-06, "loss": 1.6963571548461913, "step": 1160 }, { "epoch": 0.7922803453529711, "grad_norm": 0.8671875, "learning_rate": 1.2677134271110082e-06, "loss": 1.7136796951293944, "step": 1170 }, { "epoch": 0.7990519722363297, "grad_norm": 0.89453125, "learning_rate": 1.1901133614414352e-06, "loss": 1.7095062255859375, "step": 1180 }, { "epoch": 0.8058235991196885, "grad_norm": 0.875, "learning_rate": 1.114642128869473e-06, "loss": 1.7052017211914063, "step": 1190 }, { "epoch": 0.8125952260030472, "grad_norm": 0.8984375, "learning_rate": 1.0413419001138525e-06, "loss": 1.7166055679321288, "step": 1200 }, { "epoch": 0.819366852886406, "grad_norm": 0.87890625, "learning_rate": 9.702536328113305e-07, "loss": 1.7042055130004883, "step": 1210 }, { "epoch": 0.8261384797697647, "grad_norm": 0.8671875, "learning_rate": 9.014170486309875e-07, "loss": 1.6885286331176759, "step": 1220 }, { "epoch": 0.8329101066531234, "grad_norm": 0.84375, "learning_rate": 8.348706110791238e-07, "loss": 1.7065910339355468, "step": 1230 }, { "epoch": 0.8396817335364821, "grad_norm": 0.87109375, "learning_rate": 7.706515040071854e-07, "loss": 1.6999498367309571, "step": 1240 }, { "epoch": 0.8464533604198409, "grad_norm": 0.8828125, "learning_rate": 7.08795610834706e-07, "loss": 1.7021600723266601, "step": 1250 }, { "epoch": 0.8532249873031996, "grad_norm": 0.87890625, "learning_rate": 6.493374944988984e-07, "loss": 1.722920799255371, "step": 1260 }, { "epoch": 0.8599966141865584, "grad_norm": 0.8671875, "learning_rate": 5.923103781420708e-07, "loss": 1.7148597717285157, "step": 1270 }, { "epoch": 0.866768241069917, "grad_norm": 0.890625, "learning_rate": 5.377461265476868e-07, "loss": 1.7151250839233398, "step": 1280 }, { "epoch": 0.8735398679532758, "grad_norm": 0.8671875, "learning_rate": 4.856752283354277e-07, "loss": 1.7023918151855468, "step": 1290 }, { "epoch": 0.8803114948366345, "grad_norm": 0.8671875, "learning_rate": 4.3612677892519496e-07, "loss": 1.7045417785644532, "step": 1300 }, { "epoch": 0.8870831217199933, "grad_norm": 0.86328125, "learning_rate": 3.891284642796045e-07, "loss": 1.7008039474487304, "step": 1310 }, { "epoch": 0.8938547486033519, "grad_norm": 0.8671875, "learning_rate": 3.447065454340198e-07, "loss": 1.7126380920410156, "step": 1320 }, { "epoch": 0.9006263754867107, "grad_norm": 0.88671875, "learning_rate": 3.028858438227966e-07, "loss": 1.7127569198608399, "step": 1330 }, { "epoch": 0.9073980023700694, "grad_norm": 0.86328125, "learning_rate": 2.636897274099187e-07, "loss": 1.7151193618774414, "step": 1340 }, { "epoch": 0.9141696292534282, "grad_norm": 0.8515625, "learning_rate": 2.2714009763178945e-07, "loss": 1.704157829284668, "step": 1350 }, { "epoch": 0.9209412561367869, "grad_norm": 0.87890625, "learning_rate": 1.932573771594648e-07, "loss": 1.7036989212036133, "step": 1360 }, { "epoch": 0.9277128830201455, "grad_norm": 0.8671875, "learning_rate": 1.6206049848716765e-07, "loss": 1.7044996261596679, "step": 1370 }, { "epoch": 0.9344845099035043, "grad_norm": 1.109375, "learning_rate": 1.3356689335346728e-07, "loss": 1.7029462814331056, "step": 1380 }, { "epoch": 0.941256136786863, "grad_norm": 0.91015625, "learning_rate": 1.0779248300102352e-07, "loss": 1.7133670806884767, "step": 1390 }, { "epoch": 0.9480277636702218, "grad_norm": 0.859375, "learning_rate": 8.475166928034684e-08, "loss": 1.6992549896240234, "step": 1400 }, { "epoch": 0.9547993905535805, "grad_norm": 0.85546875, "learning_rate": 6.445732660254056e-08, "loss": 1.7066579818725587, "step": 1410 }, { "epoch": 0.9615710174369392, "grad_norm": 0.9140625, "learning_rate": 4.692079474552691e-08, "loss": 1.6963106155395509, "step": 1420 }, { "epoch": 0.9683426443202979, "grad_norm": 0.8515625, "learning_rate": 3.2151872517767194e-08, "loss": 1.7118385314941407, "step": 1430 }, { "epoch": 0.9751142712036567, "grad_norm": 0.84375, "learning_rate": 2.0158812283030403e-08, "loss": 1.6870197296142577, "step": 1440 }, { "epoch": 0.9818858980870154, "grad_norm": 0.87109375, "learning_rate": 1.094831534925289e-08, "loss": 1.7051671981811523, "step": 1450 }, { "epoch": 0.9886575249703742, "grad_norm": 0.86328125, "learning_rate": 4.5255282240802554e-09, "loss": 1.7082006454467773, "step": 1460 }, { "epoch": 0.9954291518537328, "grad_norm": 0.8828125, "learning_rate": 8.940397391787869e-10, "loss": 1.707107162475586, "step": 1470 }, { "epoch": 1.0, "eval_loss": 1.7002202272415161, "eval_runtime": 169.1979, "eval_samples_per_second": 5.656, "eval_steps_per_second": 0.709, "step": 1477 }, { "epoch": 1.0, "step": 1477, "total_flos": 2.103177196962902e+18, "train_loss": 1.7256613558986822, "train_runtime": 29239.084, "train_samples_per_second": 1.616, "train_steps_per_second": 0.051 } ], "logging_steps": 10, "max_steps": 1477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.103177196962902e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }