Files
train_boolq_42_1776331558/trainer_log.jsonl

1082 lines
256 KiB
Plaintext
Raw Permalink Normal View History

{"current_steps": 5, "total_steps": 5305, "loss": 0.8967, "lr": 3.766478342749529e-08, "epoch": 0.00471253534401508, "percentage": 0.09, "elapsed_time": "0:00:00", "remaining_time": "0:15:25", "throughput": 12319.81, "total_tokens": 10752}
{"current_steps": 10, "total_steps": 5305, "loss": 0.865, "lr": 8.474576271186442e-08, "epoch": 0.00942507068803016, "percentage": 0.19, "elapsed_time": "0:00:01", "remaining_time": "0:12:19", "throughput": 14852.79, "total_tokens": 20736}
{"current_steps": 15, "total_steps": 5305, "loss": 0.8712, "lr": 1.3182674199623353e-07, "epoch": 0.01413760603204524, "percentage": 0.28, "elapsed_time": "0:00:01", "remaining_time": "0:11:24", "throughput": 16082.47, "total_tokens": 31232}
{"current_steps": 20, "total_steps": 5305, "loss": 0.7056, "lr": 1.7890772128060264e-07, "epoch": 0.01885014137606032, "percentage": 0.38, "elapsed_time": "0:00:02", "remaining_time": "0:10:44", "throughput": 16536.51, "total_tokens": 40320}
{"current_steps": 25, "total_steps": 5305, "loss": 0.5698, "lr": 2.2598870056497177e-07, "epoch": 0.0235626767200754, "percentage": 0.47, "elapsed_time": "0:00:02", "remaining_time": "0:10:30", "throughput": 17139.68, "total_tokens": 51136}
{"current_steps": 30, "total_steps": 5305, "loss": 0.4948, "lr": 2.730696798493409e-07, "epoch": 0.02827521206409048, "percentage": 0.57, "elapsed_time": "0:00:03", "remaining_time": "0:10:28", "throughput": 17731.14, "total_tokens": 63424}
{"current_steps": 35, "total_steps": 5305, "loss": 0.3516, "lr": 3.2015065913371e-07, "epoch": 0.03298774740810556, "percentage": 0.66, "elapsed_time": "0:00:04", "remaining_time": "0:10:25", "throughput": 18135.68, "total_tokens": 75328}
{"current_steps": 40, "total_steps": 5305, "loss": 0.3321, "lr": 3.6723163841807916e-07, "epoch": 0.03770028275212064, "percentage": 0.75, "elapsed_time": "0:00:04", "remaining_time": "0:10:20", "throughput": 18408.17, "total_tokens": 86784}
{"current_steps": 45, "total_steps": 5305, "loss": 0.3329, "lr": 4.1431261770244826e-07, "epoch": 0.04241281809613572, "percentage": 0.85, "elapsed_time": "0:00:05", "remaining_time": "0:10:19", "throughput": 18696.48, "total_tokens": 99136}
{"current_steps": 50, "total_steps": 5305, "loss": 0.4395, "lr": 4.613935969868174e-07, "epoch": 0.0471253534401508, "percentage": 0.94, "elapsed_time": "0:00:05", "remaining_time": "0:10:10", "throughput": 18708.59, "total_tokens": 108672}
{"current_steps": 55, "total_steps": 5305, "loss": 0.4549, "lr": 5.084745762711865e-07, "epoch": 0.051837888784165884, "percentage": 1.04, "elapsed_time": "0:00:06", "remaining_time": "0:10:06", "throughput": 18856.96, "total_tokens": 119808}
{"current_steps": 60, "total_steps": 5305, "loss": 0.3434, "lr": 5.555555555555555e-07, "epoch": 0.05655042412818096, "percentage": 1.13, "elapsed_time": "0:00:06", "remaining_time": "0:10:00", "throughput": 18923.12, "total_tokens": 130048}
{"current_steps": 65, "total_steps": 5305, "loss": 0.3434, "lr": 6.026365348399247e-07, "epoch": 0.061262959472196045, "percentage": 1.23, "elapsed_time": "0:00:07", "remaining_time": "0:10:01", "throughput": 19096.46, "total_tokens": 142464}
{"current_steps": 70, "total_steps": 5305, "loss": 0.3516, "lr": 6.497175141242938e-07, "epoch": 0.06597549481621112, "percentage": 1.32, "elapsed_time": "0:00:08", "remaining_time": "0:09:59", "throughput": 19211.51, "total_tokens": 154048}
{"current_steps": 75, "total_steps": 5305, "loss": 0.3088, "lr": 6.96798493408663e-07, "epoch": 0.0706880301602262, "percentage": 1.41, "elapsed_time": "0:00:08", "remaining_time": "0:10:00", "throughput": 19354.97, "total_tokens": 166720}
{"current_steps": 80, "total_steps": 5305, "loss": 0.3218, "lr": 7.43879472693032e-07, "epoch": 0.07540056550424128, "percentage": 1.51, "elapsed_time": "0:00:09", "remaining_time": "0:10:00", "throughput": 19489.27, "total_tokens": 179200}
{"current_steps": 85, "total_steps": 5305, "loss": 0.3962, "lr": 7.909604519774013e-07, "epoch": 0.08011310084825636, "percentage": 1.6, "elapsed_time": "0:00:09", "remaining_time": "0:09:58", "throughput": 19551.39, "total_tokens": 190464}
{"current_steps": 90, "total_steps": 5305, "loss": 0.3243, "lr": 8.380414312617704e-07, "epoch": 0.08482563619227144, "percentage": 1.7, "elapsed_time": "0:00:10", "remaining_time": "0:10:03", "throughput": 19708.81, "total_tokens": 205376}
{"current_steps": 95, "total_steps": 5305, "loss": 0.383, "lr": 8.851224105461394e-07, "epoch": 0.08953817153628653, "percentage": 1.79, "elapsed_time": "0:00:10", "remaining_time": "0:09:59", "throughput": 19706.64, "total_tokens": 215424}
{"current_steps": 100, "total_steps": 5305, "loss": 0.2987, "lr": 9.322033898305086e-07, "epoch": 0.0942507068803016, "percentage": 1.89, "elapsed_time": "0:00:11", "remaining_time": "0:09:57", "throughput": 19740.77, "total_tokens": 226688}
{"current_steps": 105, "total_steps": 5305, "loss": 0.2859, "lr": 9.792843691148776e-07, "epoch": 0.09896324222431668, "percentage": 1.98, "elapsed_time": "0:00:12", "remaining_time": "0:09:54", "throughput": 19757.7, "total_tokens": 237248}
{"current_steps": 110, "total_steps": 5305, "loss": 0.3517, "lr": 1.0263653483992468e-06, "epoch": 0.10367577756833177, "percentage": 2.07, "elapsed_time": "0:00:12", "remaining_time": "0:10:00", "throughput": 19974.86, "total_tokens": 254144}
{"current_steps": 115, "total_steps": 5305, "loss": 0.296, "lr": 1.073446327683616e-06, "epoch": 0.10838831291234684, "percentage": 2.17, "elapsed_time": "0:00:13", "remaining_time": "0:09:59", "throughput": 20015.85, "total_tokens": 265920}
{"current_steps": 120, "total_steps": 5305, "loss": 0.4375, "lr": 1.120527306967985e-06, "epoch": 0.11310084825636192, "percentage": 2.26, "elapsed_time": "0:00:13", "remaining_time": "0:09:57", "throughput": 20038.79, "total_tokens": 277184}
{"current_steps": 125, "total_steps": 5305, "loss": 0.2998, "lr": 1.167608286252354e-06, "epoch": 0.117813383600377, "percentage": 2.36, "elapsed_time": "0:00:14", "remaining_time": "0:09:56", "throughput": 20063.22, "total_tokens": 289024}
{"current_steps": 130, "total_steps": 5305, "loss": 0.2883, "lr": 1.2146892655367234e-06, "epoch": 0.12252591894439209, "percentage": 2.45, "elapsed_time": "0:00:14", "remaining_time": "0:09:54", "throughput": 20050.7, "total_tokens": 299456}
{"current_steps": 135, "total_steps": 5305, "loss": 0.3329, "lr": 1.2617702448210926e-06, "epoch": 0.12723845428840716, "percentage": 2.54, "elapsed_time": "0:00:15", "remaining_time": "0:09:56", "throughput": 20140.53, "total_tokens": 313728}
{"current_steps": 140, "total_steps": 5305, "loss": 0.2533, "lr": 1.3088512241054615e-06, "epoch": 0.13195098963242224, "percentage": 2.64, "elapsed_time": "0:00:16", "remaining_time": "0:09:56", "throughput": 20178.41, "total_tokens": 326080}
{"current_steps": 145, "total_steps": 5305, "loss": 0.2705, "lr": 1.3559322033898307e-06, "epoch": 0.13666352497643733, "percentage": 2.73, "elapsed_time": "0:00:16", "remaining_time": "0:09:55", "throughput": 20227.49, "total_tokens": 338688}
{"current_steps": 150, "total_steps": 5305, "loss": 0.3576, "lr": 1.4030131826741996e-06, "epoch": 0.1413760603204524, "percentage": 2.83, "elapsed_time": "0:00:17", "remaining_time": "0:09:54", "throughput": 20223.12, "total_tokens": 349632}
{"current_steps": 155, "total_steps": 5305, "loss": 0.2256, "lr": 1.4500941619585688e-06, "epoch": 0.1460885956644675, "percentage": 2.92, "elapsed_time": "0:00:17", "remaining_time": "0:09:55", "throughput": 20290.72, "total_tokens": 363968}
{"current_steps": 160, "total_steps": 5305, "loss": 0.4483, "lr": 1.4971751412429381e-06, "epoch": 0.15080113100848255, "percentage": 3.02, "elapsed_time": "0:00:18", "remaining_time": "0:09:54", "throughput": 20306.75, "total_tokens": 375680}
{"current_steps": 165, "total_steps": 5305, "loss": 0.269, "lr": 1.544256120527307e-06, "epoch": 0.15551366635249764, "percentage": 3.11, "elapsed_time": "0:00:19", "remaining_time": "0:09:52", "throughput": 20301.51, "total_tokens": 386368}
{"current_steps": 170, "total_steps": 5305, "loss": 0.3019, "lr": 1.5913370998116762e-06, "epoch": 0.16022620169651272, "percentage": 3.2, "elapsed_time": "0:00:19", "remaining_time": "0:09:50", "throughput": 20297.31, "total_tokens": 396992}
{"current_steps": 175, "total_steps": 5305, "loss": 0.3423, "lr": 1.6384180790960452e-06, "epoch": 0.1649387370405278, "percentage": 3.3, "elapsed_time": "0:00:20", "remaining_time": "0:09:50", "throughput": 20316.18, "total_tokens": 408960}
{"current_steps": 180, "total_steps": 5305, "loss": 0.2813, "lr": 1.6854990583804145e-06, "epoch": 0.1696512723845429, "percentage": 3.39, "elapsed_time": "0:00:20", "remaining_time": "0:09:47", "throughput": 20293.69, "total_tokens": 419008}
{"current_steps": 185, "total_steps": 5305, "loss": 0.2445, "lr": 1.7325800376647837e-06, "epoch": 0.17436380772855797, "percentage": 3.49, "elapsed_time": "0:00:21", "remaining_time": "0:09:46", "throughput": 20298.59, "total_tokens": 430144}
{"current_steps": 190, "total_steps": 5305, "loss": 0.2385, "lr": 1.7796610169491526e-06, "epoch": 0.17907634307257306, "percentage": 3.58, "elapsed_time": "0:00:21", "remaining_time": "0:09:45", "throughput": 20300.11, "total_tokens": 441216}
{"current_steps": 195, "total_steps": 5305, "loss": 0.2216, "lr": 1.8267419962335218e-06, "epoch": 0.18378887841658811, "percentage": 3.68, "elapsed_time": "0:00:22", "remaining_time": "0:09:43", "throughput": 20290.17, "total_tokens": 451584}
{"current_steps": 200, "total_steps": 5305, "loss": 0.4569, "lr": 1.873822975517891e-06, "epoch": 0.1885014137606032, "percentage": 3.77, "elapsed_time": "0:00:22", "remaining_time": "0:09:43", "throughput": 20317.38, "total_tokens": 464384}
{"current_steps": 205, "total_steps": 5305, "loss": 0.4075, "lr": 1.92090395480226e-06, "epoch": 0.19321394910461828, "percentage": 3.86, "elapsed_time": "0:00:23", "remaining_time": "0:09:47", "throughput": 20415.14, "total_tokens": 481792}
{"current_steps": 210, "total_steps": 5305, "loss": 0.2703, "lr": 1.9679849340866293e-06, "epoch": 0.19792648444863337, "percentage": 3.96, "elapsed_time": "0:00:24", "remaining_time": "0:09:46", "throughput": 20430.22, "total_tokens": 493952}
{"current_steps": 215, "total_steps": 5305, "loss": 0.2604, "lr": 2.015065913370998e-06, "epoch": 0.20263901979264845, "percentage": 4.05, "elapsed_time": "0:00:24", "remaining_time": "0:09:45", "throughput": 20429.09, "total_tokens": 504832}
{"current_steps": 220, "total_steps": 5305, "loss": 0.277, "lr": 2.062146892655367e-06, "epoch": 0.20735155513666353, "percentage": 4.15, "elapsed_time": "0:00:25", "remaining_time": "0:09:42", "throughput": 20397.41, "total_tokens": 514368}
{"current_steps": 225, "total_steps": 5305, "loss": 0.2405, "lr": 2.1092278719397365e-06, "epoch": 0.21206409048067862, "percentage": 4.24, "elapsed_time": "0:00:25", "remaining_time": "0:09:41", "throughput": 20404.06, "total_tokens": 525568}
{"current_steps": 230, "total_steps": 5305, "loss": 0.259, "lr": 2.1563088512241055e-06, "epoch": 0.21677662582469368, "percentage": 4.34, "elapsed_time": "0:00:26", "remaining_time": "0:09:40", "throughput": 20427.31, "total_tokens": 537664}
{"current_steps": 235, "total_steps": 5305, "loss": 0.2561, "lr": 2.203389830508475e-06, "epoch": 0.22148916116870876, "percentage": 4.43, "elapsed_time": "0:00:26", "remaining_time": "0:09:38", "throughput": 20409.56, "total_tokens": 547584}
{"current_steps": 240, "total_steps": 5305, "loss": 0.3491, "lr": 2.2504708097928438e-06, "epoch": 0.22620169651272384, "percentage": 4.52, "elapsed_time": "0:00:27", "remaining_time": "0:09:37", "throughput": 20403.11, "total_tokens": 558144}
{"current_steps": 245, "total_steps": 5305, "loss": 0.2543, "lr": 2.297551789077213e-06, "epoch": 0.23091423185673893, "percentage": 4.62, "elapsed_time": "0:00:27", "remaining_time": "0:09:36", "throughput": 20398.15, "total_tokens": 569024}
{"current_steps": 250, "total_steps": 5305, "loss": 0.3033, "lr": 2.344632768361582e-06, "epoch": 0.235626767200754, "percentage": 4.71, "elapsed_time": "0:00:28", "remaining_time": "0:09:35", "throughput": 20412.07, "total_tokens": 580864}
{"current_steps": 255, "total_steps": 5305, "loss": 0.2747, "lr": 2.391713747645951e-06, "epoch": 0.2403393025447691, "percentage": 4.81, "elapsed_time": "0:00:29", "remaining_time": "0:09:34", "throughput": 20423.53, "total_tokens": 592768}
{"current_steps": 260, "total_steps": 5305, "loss": 0.336, "lr": 2.4387947269303204e-06, "epoch": 0.24505183788878418, "percentage": 4.9, "elapsed_time": "0:00:29", "remaining_time": "0:09:33", "throughput": 20428.71, "total_tokens": 604032}
{"current_steps": 265, "total_steps": 5305, "loss": 0.2277, "lr": 2.4858757062146898e-06, "epoch": 0.24976437323279924, "percentage": 5.0, "elapsed_time": "0:00:30", "remaining_time": "0:09:33", "throughput": 20440.08, "total_tokens": 616256}
{"current_steps": 266, "total_steps": 5305, "eval_loss": 0.25048765540122986, "epoch": 0.25070688030160226, "percentage": 5.01, "elapsed_time": "0:00:32", "remaining_time": "0:10:24", "throughput": 18744.75, "total_tokens": 618432}
{"current_steps": 270, "total_steps": 5305, "loss": 0.2331, "lr": 2.5329566854990583e-06, "epoch": 0.2544769085768143, "percentage": 5.09, "elapsed_time": "0:01:58", "remaining_time": "0:36:52", "throughput": 5285.23, "total_tokens": 627072}
{"current_steps": 275, "total_steps": 5305, "loss": 0.157, "lr": 2.5800376647834272e-06, "epoch": 0.25918944392082943, "percentage": 5.18, "elapsed_time": "0:01:59", "remaining_time": "0:36:20", "throughput": 5357.1, "total_tokens": 638592}
{"current_steps": 280, "total_steps": 5305, "loss": 0.3209, "lr": 2.627118644067797e-06, "epoch": 0.2639019792648445, "percentage": 5.28, "elapsed_time": "0:01:59", "remaining_time": "0:35:48", "throughput": 5416.58, "total_tokens": 648448}
{"current_steps": 285, "total_steps": 5305, "loss": 0.2578, "lr": 2.674199623352166e-06, "epoch": 0.26861451460885954, "percentage": 5.37, "elapsed_time": "0:02:00", "remaining_time": "0:35:19", "throughput": 5506.97, "total_tokens": 662784}
{"current_steps": 290, "total_steps": 5305, "loss": 0.3557, "lr": 2.7212806026365353e-06, "epoch": 0.27332704995287466, "percentage": 5.47, "elapsed_time": "0:02:00", "remaining_time": "0:34:50", "throughput": 5573.82, "total_tokens": 673856}
{"current_steps": 295, "total_steps": 5305, "loss": 0.2089, "lr": 2.7683615819209043e-06, "epoch": 0.2780395852968897, "percentage": 5.56, "elapsed_time": "0:02:01", "remaining_time": "0:34:21", "throughput": 5627.84, "total_tokens": 683136}
{"current_steps": 300, "total_steps": 5305, "loss": 0.2989, "lr": 2.8154425612052732e-06, "epoch": 0.2827521206409048, "percentage": 5.66, "elapsed_time": "0:02:01", "remaining_time": "0:33:54", "throughput": 5697.58, "total_tokens": 694784}
{"current_steps": 305, "total_steps": 5305, "loss": 0.2632, "lr": 2.862523540489642e-06, "epoch": 0.2874646559849199, "percentage": 5.75, "elapsed_time": "0:02:02", "remaining_time": "0:33:28", "throughput": 5768.13, "total_tokens": 706624}
{"current_steps": 310, "total_steps": 5305, "loss": 0.2979, "lr": 2.9096045197740115e-06, "epoch": 0.292177191328935, "percentage": 5.84, "elapsed_time": "0:02:03", "remaining_time": "0:33:02", "throughput": 5826.65, "total_tokens": 716800}
{"current_steps": 315, "total_steps": 5305, "loss": 0.3261, "lr": 2.9566854990583805e-06, "epoch": 0.29688972667295005, "percentage": 5.94, "elapsed_time": "0:02:03", "remaining_time": "0:32:37", "throughput": 5896.2, "total_tokens": 728704}
{"current_steps": 320, "total_steps": 5305, "loss": 0.1851, "lr": 3.00376647834275e-06, "epoch": 0.3016022620169651, "percentage": 6.03, "elapsed_time": "0:02:04", "remaining_time": "0:32:14", "throughput": 5963.2, "total_tokens": 740352}
{"current_steps": 325, "total_steps": 5305, "loss": 0.2727, "lr": 3.0508474576271192e-06, "epoch": 0.3063147973609802, "percentage": 6.13, "elapsed_time": "0:02:04", "remaining_time": "0:31:50", "throughput": 6029.47, "total_tokens": 751936}
{"current_steps": 330, "total_steps": 5305, "loss": 0.3077, "lr": 3.097928436911488e-06, "epoch": 0.3110273327049953, "percentage": 6.22, "elapsed_time": "0:02:05", "remaining_time": "0:31:28", "throughput": 6093.41, "total_tokens": 763264}
{"current_steps": 335, "total_steps": 5305, "loss": 0.3285, "lr": 3.145009416195857e-06, "epoch": 0.3157398680490104, "percentage": 6.31, "elapsed_time": "0:02:05", "remaining_time": "0:31:05", "throughput": 6146.11, "total_tokens": 772992}
{"current_steps": 340, "total_steps": 5305, "loss": 0.2493, "lr": 3.192090395480226e-06, "epoch": 0.32045240339302544, "percentage": 6.41, "elapsed_time": "0:02:06", "remaining_time": "0:30:45", "throughput": 6226.27, "total_tokens": 787008}
{"current_steps": 345, "total_steps": 5305, "loss": 0.233, "lr": 3.2391713747645954e-06, "epoch": 0.32516493873704055, "percentage": 6.5, "elapsed_time": "0:02:06", "remaining_time": "0:30:25", "throughput": 6291.61, "total_tokens": 798848}
{"current_steps": 350, "total_steps": 5305, "loss": 0.3409, "lr": 3.2862523540489644e-06, "epoch": 0.3298774740810556, "percentage": 6.6, "elapsed_time": "0:02:07", "remaining_time": "0:30:05", "throughput": 6362.1, "total_tokens": 811584}
{"current_steps": 355, "total_steps": 5305, "loss": 0.1946, "lr": 3.3333333333333333e-06, "epoch": 0.33459000942507067, "percentage": 6.69, "elapsed_time": "0:02:08", "remaining_time": "0:29:46", "throughput": 6418.8, "total_tokens": 822208}
{"current_steps": 360, "total_steps": 5305, "loss": 0.3024, "lr": 3.3804143126177023e-06, "epoch": 0.3393025447690858, "percentage": 6.79, "elapsed_time": "0:02:08", "remaining_time": "0:29:27", "throughput": 6480.74, "total_tokens": 833792}
{"current_steps": 365, "total_steps": 5305, "loss": 0.347, "lr": 3.427495291902072e-06, "epoch": 0.34401508011310084, "percentage": 6.88, "elapsed_time": "0:02:09", "remaining_time": "0:29:08", "throughput": 6543.51, "total_tokens": 845568}
{"current_steps": 370, "total_steps": 5305, "loss": 0.3787, "lr": 3.474576271186441e-06, "epoch": 0.34872761545711595, "percentage": 6.97, "elapsed_time": "0:02:09", "remaining_time": "0:28:50", "throughput": 6591.04, "total_tokens": 855040}
{"current_steps": 375, "total_steps": 5305, "loss": 0.3066, "lr": 3.5216572504708104e-06, "epoch": 0.353440150801131, "percentage": 7.07, "elapsed_time": "0:02:10", "remaining_time": "0:28:33", "throughput": 6658.45, "total_tokens": 867712}
{"current_steps": 380, "total_steps": 5305, "loss": 0.2843, "lr": 3.5687382297551793e-06, "epoch": 0.3581526861451461, "percentage": 7.16, "elapsed_time": "0:02:10", "remaining_time": "0:28:16", "throughput": 6729.13, "total_tokens": 880960}
{"current_steps": 385, "total_steps": 5305, "loss": 0.2422, "lr": 3.6158192090395483e-06, "epoch": 0.36286522148916117, "percentage": 7.26, "elapsed_time": "0:02:11", "remaining_time": "0:28:01", "throughput": 6802.08, "total_tokens": 894784}
{"current_steps": 390, "total_steps": 5305, "loss": 0.2664, "lr": 3.662900188323917e-06, "epoch": 0.36757775683317623, "percentage": 7.35, "elapsed_time": "0:02:12", "remaining_time": "0:27:44", "throughput": 6856.47, "total_tokens": 905600}
{"current_steps": 395, "total_steps": 5305, "loss": 0.3223, "lr": 3.7099811676082866e-06, "epoch": 0.37229029217719134, "percentage": 7.45, "elapsed_time": "0:02:12", "remaining_time": "0:27:28", "throughput": 6901.97, "total_tokens": 915072}
{"current_steps": 400, "total_steps": 5305, "loss": 0.2066, "lr": 3.7570621468926555e-06, "epoch": 0.3770028275212064, "percentage": 7.54, "elapsed_time": "0:02:13", "remaining_time": "0:27:12", "throughput": 6965.23, "total_tokens": 927552}
{"current_steps": 405, "total_steps": 5305, "loss": 0.2964, "lr": 3.8041431261770245e-06, "epoch": 0.3817153628652215, "percentage": 7.63, "elapsed_time": "0:02:13", "remaining_time": "0:26:58", "throughput": 7028.74, "total_tokens": 940160}
{"current_steps": 410, "total_steps": 5305, "loss": 0.1989, "lr": 3.851224105461394e-06, "epoch": 0.38642789820923656, "percentage": 7.73, "elapsed_time": "0:02:14", "remaining_time": "0:26:43", "throughput": 7073.71, "total_tokens": 949760}
{"current_steps": 415, "total_steps": 5305, "loss": 0.2152, "lr": 3.898305084745763e-06, "epoch": 0.3911404335532517, "percentage": 7.82, "elapsed_time": "0:02:14", "remaining_time": "0:26:28", "throughput": 7127.73, "total_tokens": 960896}
{"current_steps": 420, "total_steps": 5305, "loss": 0.3237, "lr": 3.945386064030132e-06, "epoch": 0.39585296889726673, "percentage": 7.92, "elapsed_time": "0:02:15", "remaining_time": "0:26:14", "throughput": 7179.1, "total_tokens": 971648}
{"current_steps": 425, "total_steps": 5305, "loss": 0.2182, "lr": 3.992467043314501e-06, "epoch": 0.4005655042412818, "percentage": 8.01, "elapsed_time": "0:02:15", "remaining_time": "0:25:59", "throughput": 7224.71, "total_tokens": 981504}
{"current_steps": 430, "total_steps": 5305, "loss": 0.1811, "lr": 4.03954802259887e-06, "epoch": 0.4052780395852969, "percentage": 8.11, "elapsed_time": "0:02:16", "remaining_time": "0:25:46", "throughput": 7283.05, "total_tokens": 993664}
{"current_steps": 435, "total_steps": 5305, "loss": 0.2151, "lr": 4.08662900188324e-06, "epoch": 0.40999057492931196, "percentage": 8.2, "elapsed_time": "0:02:16", "remaining_time": "0:25:33", "throughput": 7324.7, "total_tokens": 1003008}
{"current_steps": 440, "total_steps": 5305, "loss": 0.2333, "lr": 4.133709981167609e-06, "epoch": 0.41470311027332707, "percentage": 8.29, "elapsed_time": "0:02:17", "remaining_time": "0:25:19", "throughput": 7371.39, "total_tokens": 1013248}
{"current_steps": 445, "total_steps": 5305, "loss": 0.2694, "lr": 4.180790960451978e-06, "epoch": 0.4194156456173421, "percentage": 8.39, "elapsed_time": "0:02:17", "remaining_time": "0:25:06", "throughput": 7416.62, "total_tokens": 1023296}
{"current_steps": 450, "total_steps": 5305, "loss": 0.3493, "lr": 4.2278719397363475e-06, "epoch": 0.42412818096135724, "percentage": 8.48, "elapsed_time": "0:02:18", "remaining_time": "0:24:54", "throughput": 7460.63, "total_tokens": 1033152}
{"current_steps": 455, "total_steps": 5305, "loss": 0.2147, "lr": 4.2749529190207165e-06, "epoch": 0.4288407163053723, "percentage": 8.58, "elapsed_time": "0:02:19", "remaining_time": "0:24:42", "throughput": 7516.75, "total_tokens": 1045248}
{"current_steps": 460, "total_steps": 5305, "loss": 0.3071, "lr": 4.322033898305085e-06, "epoch": 0.43355325164938735, "percentage": 8.67, "elapsed_time": "0:02:19", "remaining_time": "0:24:31", "throughput": 7581.24, "total_tokens": 1058944}
{"current_steps": 465, "total_steps": 5305, "loss": 0.2118, "lr": 4.369114877589454e-06, "epoch": 0.43826578699340246, "percentage": 8.77, "elapsed_time": "0:02:20", "remaining_time": "0:24:20", "throughput": 7640.14, "total_tokens": 1071680}
{"current_steps": 470, "total_steps": 5305, "loss": 0.2319, "lr": 4.416195856873823e-06, "epoch": 0.4429783223374175, "percentage": 8.86, "elapsed_time": "0:02:20", "remaining_time": "0:24:08", "throughput": 7692.75, "total_tokens": 1083328}
{"current_steps": 475, "total_steps": 5305, "loss": 0.4004, "lr": 4.463276836158192e-06, "epoch": 0.44769085768143263, "percentage": 8.95, "elapsed_time": "0:02:21", "remaining_time": "0:23:58", "throughput": 7759.39, "total_tokens": 1097600}
{"current_steps": 480, "total_steps": 5305, "loss": 0.218, "lr": 4.510357815442561e-06, "epoch": 0.4524033930254477, "percentage": 9.05, "elapsed_time": "0:02:22", "remaining_time": "0:23:47", "throughput": 7811.71, "total_tokens": 1109376}
{"current_steps": 485, "total_steps": 5305, "loss": 0.2579, "lr": 4.55743879472693e-06, "epoch": 0.4571159283694628, "percentage": 9.14, "elapsed_time": "0:02:22", "remaining_time": "0:23:36", "throughput": 7859.63, "total_tokens": 1120448}
{"current_steps": 490, "total_steps": 5305, "loss": 0.298, "lr": 4.6045197740113e-06, "epoch": 0.46182846371347785, "percentage": 9.24, "elapsed_time": "0:02:23", "remaining_time": "0:23:26", "throughput": 7906.46, "total_tokens": 1131392}
{"current_steps": 495, "total_steps": 5305, "loss": 0.2203, "lr": 4.651600753295669e-06, "epoch": 0.4665409990574929, "percentage": 9.33, "elapsed_time": "0:02:23", "remaining_time": "0:23:16", "throughput": 7959.39, "total_tokens": 1143552}
{"current_steps": 500, "total_steps": 5305, "loss": 0.1785, "lr": 4.698681732580039e-06, "epoch": 0.471253534401508, "percentage": 9.43, "elapsed_time": "0:02:24", "remaining_time": "0:23:05", "throughput": 7997.71, "total_tokens": 1153088}
{"current_steps": 505, "total_steps": 5305, "loss": 0.2125, "lr": 4.745762711864408e-06, "epoch": 0.4759660697455231, "percentage": 9.52, "elapsed_time": "0:02:24", "remaining_time": "0:22:56", "throughput": 8054.61, "total_tokens": 1166080}
{"current_steps": 510, "total_steps": 5305, "loss": 0.3882, "lr": 4.7928436911487765e-06, "epoch": 0.4806786050895382, "percentage": 9.61, "elapsed_time": "0:02:25", "remaining_time": "0:22:46", "throughput": 8099.32, "total_tokens": 1176896}
{"current_steps": 515, "total_steps": 5305, "loss": 0.2743, "lr": 4.8399246704331455e-06, "epoch": 0.48539114043355325, "percentage": 9.71, "elapsed_time": "0:02:25", "remaining_time": "0:22:36", "throughput": 8141.79, "total_tokens": 1187392}
{"current_steps": 520, "total_steps": 5305, "loss": 0.3309, "lr": 4.8870056497175144e-06, "epoch": 0.49010367577756836, "percentage": 9.8, "elapsed_time": "0:02:26", "remaining_time": "0:22:26", "throughput": 8177.45, "total_tokens": 1196672}
{"current_steps": 525, "total_steps": 5305, "loss": 0.3264, "lr": 4.934086629001883e-06, "epoch": 0.4948162111215834, "percentage": 9.9, "elapsed_time": "0:02:26", "remaining_time": "0:22:17", "throughput": 8231.01, "total_tokens": 1209344}
{"current_steps": 530, "total_steps": 5305, "loss": 0.2193, "lr": 4.981167608286252e-06, "epoch": 0.49952874646559847, "percentage": 9.99, "elapsed_time": "0:02:27", "remaining_time": "0:22:08", "throughput": 8281.78, "total_tokens": 1221504}
{"current_steps": 532, "total_steps": 5305, "eval_loss": 0.31662699580192566, "epoch": 0.5014137606032045, "percentage": 10.03, "elapsed_time": "0:02:31", "remaining_time": "0:22:38", "throughput": 8094.58, "total_tokens": 1225408}
{"current_steps": 535, "total_steps": 5305, "loss": 0.3059, "lr": 4.999995128224159e-06, "epoch": 0.5042412818096136, "percentage": 10.08, "elapsed_time": "0:03:14", "remaining_time": "0:28:58", "throughput": 6319.95, "total_tokens": 1232256}
{"current_steps": 540, "total_steps": 5305, "loss": 0.2494, "lr": 4.999965356329446e-06, "epoch": 0.5089538171536286, "percentage": 10.18, "elapsed_time": "0:03:15", "remaining_time": "0:28:45", "throughput": 6357.32, "total_tokens": 1242880}
{"current_steps": 545, "total_steps": 5305, "loss": 0.2812, "lr": 4.99990851940408e-06, "epoch": 0.5136663524976437, "percentage": 10.27, "elapsed_time": "0:03:16", "remaining_time": "0:28:32", "throughput": 6393.27, "total_tokens": 1253248}
{"current_steps": 550, "total_steps": 5305, "loss": 0.2639, "lr": 4.999824618063384e-06, "epoch": 0.5183788878416589, "percentage": 10.37, "elapsed_time": "0:03:16", "remaining_time": "0:28:19", "throughput": 6436.08, "total_tokens": 1265280}
{"current_steps": 555, "total_steps": 5305, "loss": 0.3403, "lr": 4.99971365321569e-06, "epoch": 0.5230914231856739, "percentage": 10.46, "elapsed_time": "0:03:17", "remaining_time": "0:28:06", "throughput": 6470.11, "total_tokens": 1275328}
{"current_steps": 560, "total_steps": 5305, "loss": 0.2942, "lr": 4.9995756260623194e-06, "epoch": 0.527803958529689, "percentage": 10.56, "elapsed_time": "0:03:17", "remaining_time": "0:27:54", "throughput": 6507.86, "total_tokens": 1286272}
{"current_steps": 565, "total_steps": 5305, "loss": 0.2036, "lr": 4.999410538097579e-06, "epoch": 0.532516493873704, "percentage": 10.65, "elapsed_time": "0:03:18", "remaining_time": "0:27:43", "throughput": 6551.91, "total_tokens": 1298816}
{"current_steps": 570, "total_steps": 5305, "loss": 0.2656, "lr": 4.999218391108735e-06, "epoch": 0.5372290292177191, "percentage": 10.74, "elapsed_time": "0:03:18", "remaining_time": "0:27:31", "throughput": 6596.9, "total_tokens": 1311680}
{"current_steps": 575, "total_steps": 5305, "loss": 0.2828, "lr": 4.9989991871760054e-06, "epoch": 0.5419415645617343, "percentage": 10.84, "elapsed_time": "0:03:19", "remaining_time": "0:27:20", "throughput": 6642.43, "total_tokens": 1324672}
{"current_steps": 580, "total_steps": 5305, "loss": 0.2081, "lr": 4.998752928672525e-06, "epoch": 0.5466540999057493, "percentage": 10.93, "elapsed_time": "0:03:20", "remaining_time": "0:27:09", "throughput": 6684.45, "total_tokens": 1336896}
{"current_steps": 585, "total_steps": 5305, "loss": 0.1485, "lr": 4.9984796182643285e-06, "epoch": 0.5513666352497644, "percentage": 11.03, "elapsed_time": "0:03:20", "remaining_time": "0:26:58", "throughput": 6725.43, "total_tokens": 1348928}
{"current_steps": 590, "total_steps": 5305, "loss": 0.6339, "lr": 4.99817925891032e-06, "epoch": 0.5560791705937794, "percentage": 11.12, "elapsed_time": "0:03:21", "remaining_time": "0:26:47", "throughput": 6772.31, "total_tokens": 1362496}
{"current_steps": 595, "total_steps": 5305, "loss": 0.5079, "lr": 4.997851853862237e-06, "epoch": 0.5607917059377945, "percentage": 11.22, "elapsed_time": "0:03:21", "remaining_time": "0:26:37", "throughput": 6823.24, "total_tokens": 1377152}
{"current_steps": 600, "total_steps": 5305, "loss": 0.2396, "lr": 4.997497406664621e-06, "epoch": 0.5655042412818096, "percentage": 11.31, "elapsed_time": "0:03:22", "remaining_time": "0:26:27", "throughput": 6866.92, "total_tokens": 1390016}
{"current_steps": 605, "total_steps": 5305, "loss": 0.2335, "lr": 4.997115921154774e-06, "epoch": 0.5702167766258247, "percentage": 11.4, "elapsed_time": "0:03:22", "remaining_time": "0:26:16", "throughput": 6906.19, "total_tokens": 1401856}
{"current_steps": 610, "total_steps": 5305, "loss": 0.1719, "lr": 4.9967074014627206e-06, "epoch": 0.5749293119698398, "percentage": 11.5, "elapsed_time": "0:03:23", "remaining_time": "0:26:06", "throughput": 6941.46, "total_tokens": 1412736}
{"current_steps": 615, "total_steps": 5305, "loss": 0.3201, "lr": 4.996271852011161e-06, "epoch": 0.5796418473138548, "percentage": 11.59, "elapsed_time": "0:03:24", "remaining_time": "0:25:56", "throughput": 6983.08, "total_tokens": 1425280}
{"current_steps": 620, "total_steps": 5305, "loss": 0.1993, "lr": 4.995809277515424e-06, "epoch": 0.58435438265787, "percentage": 11.69, "elapsed_time": "0:03:24", "remaining_time": "0:25:46", "throughput": 7019.12, "total_tokens": 1436480}
{"current_steps": 625, "total_steps": 5305, "loss": 0.3072, "lr": 4.995319682983417e-06, "epoch": 0.589066918001885, "percentage": 11.78, "elapsed_time": "0:03:25", "remaining_time": "0:25:36", "throughput": 7055.29, "total_tokens": 1447808}
{"current_steps": 630, "total_steps": 5305, "loss": 0.3263, "lr": 4.99480307371557e-06, "epoch": 0.5937794533459001, "percentage": 11.88, "elapsed_time": "0:03:25", "remaining_time": "0:25:27", "throughput": 7096.4, "total_tokens": 1460352}
{"current_steps": 635, "total_steps": 5305, "loss": 0.2747, "lr": 4.9942594553047775e-06, "epoch": 0.5984919886899152, "percentage": 11.97, "elapsed_time": "0:03:26", "remaining_time": "0:25:17", "throughput": 7136.28, "total_tokens": 1472640}
{"current_steps": 640, "total_steps": 5305, "loss": 0.2984, "lr": 4.993688833636341e-06, "epoch": 0.6032045240339302, "percentage": 12.06, "elapsed_time": "0:03:26", "remaining_time": "0:25:07", "throughput": 7167.0, "total_tokens": 1482688}
{"current_steps": 645, "total_steps": 5305, "loss": 0.2671, "lr": 4.993091214887904e-06, "epoch": 0.6079170593779454, "percentage": 12.16, "elapsed_time": "0:03:27", "remaining_time": "0:24:58", "throughput": 7203.76, "total_tokens": 1494336}
{"current_steps": 650, "total_steps": 5305, "loss": 0.1511, "lr": 4.992466605529384e-06, "epoch": 0.6126295947219604, "percentage": 12.25, "elapsed_time": "0:03:27", "remaining_time": "0:24:49", "throughput": 7236.24, "total_tokens": 1504896}
{"current_steps": 655, "total_steps": 5305, "loss": 0.3427, "lr": 4.991815012322902e-06, "epoch": 0.6173421300659755, "percentage": 12.35, "elapsed_time": "0:03:28", "remaining_time": "0:24:40", "throughput": 7280.32, "total_tokens": 1518592}
{"current_steps": 660, "total_steps": 5305, "loss": 0.2164, "lr": 4.991136442322713e-06, "epoch": 0.6220546654099905, "percentage": 12.44, "elapsed_time": "0:03:29", "remaining_time": "0:24:32", "throughput": 7320.52, "total_tokens": 1531264}
{"current_steps": 665, "total_steps": 5305, "loss": 0.2187, "lr": 4.990430902875125e-06, "epoch": 0.6267672007540056, "percentage": 12.54, "elapsed_time": "0:03:29", "remaining_time": "0:24:23", "throughput": 7350.6, "total_tokens": 1541376}
{"current_steps": 670, "total_steps": 5305, "loss": 0.2911, "lr": 4.989698401618423e-06, "epoch": 0.6314797360980208, "percentage": 12.63, "elapsed_time": "0:03:30", "remaining_time": "0:24:14", "throughput": 7380.31, "total_tokens": 1551424}
{"current_steps": 675, "total_steps": 5305, "loss": 0.1331, "lr": 4.988938946482786e-06, "epoch": 0.6361922714420358, "percentage": 12.72, "elapsed_time": "0:03:30", "remaining_time": "0:24:05", "throughput": 7414.16, "total_tokens": 1562624}
{"current_steps": 680, "total_steps": 5305, "loss": 0.2686, "lr": 4.988152545690197e-06, "epoch": 0.6409048067860509, "percentage": 12.82, "elapsed_time": "0:03:31", "remaining_time": "0:23:57", "throughput": 7448.8, "total_tokens": 1574016}
{"current_steps": 685, "total_steps": 5305, "loss": 0.3307, "lr": 4.987339207754358e-06, "epoch": 0.6456173421300659, "percentage": 12.91, "elapsed_time": "0:03:31", "remaining_time": "0:23:49", "throughput": 7487.78, "total_tokens": 1586688}
{"current_steps": 690, "total_steps": 5305, "loss": 0.247, "lr": 4.9864989414806e-06, "epoch": 0.6503298774740811, "percentage": 13.01, "elapsed_time": "0:03:32", "remaining_time": "0:23:40", "throughput": 7517.75, "total_tokens": 1596992}
{"current_steps": 695, "total_steps": 5305, "loss": 0.3232, "lr": 4.985631755965779e-06, "epoch": 0.6550424128180962, "percentage": 13.1, "elapsed_time": "0:03:33", "remaining_time": "0:23:33", "throughput": 7557.12, "total_tokens": 1609920}
{"current_steps": 700, "total_steps": 5305, "loss": 0.2132, "lr": 4.984737660598187e-06, "epoch": 0.6597549481621112, "percentage": 13.2, "elapsed_time": "0:03:33", "remaining_time": "0:23:25", "throughput": 7588.61, "total_tokens": 1620736}
{"current_steps": 705, "total_steps": 5305, "loss": 0.2797, "lr": 4.983816665057447e-06, "epoch": 0.6644674835061263, "percentage": 13.29, "elapsed_time": "0:03:34", "remaining_time": "0:23:17", "throughput": 7623.39, "total_tokens": 1632512}
{"current_steps": 710, "total_steps": 5305, "loss": 0.3142, "lr": 4.982868779314405e-06, "epoch": 0.6691800188501413, "percentage": 13.38, "elapsed_time": "0:03:34", "remaining_time": "0:23:09", "throughput": 7654.24, "total_tokens": 1643264}
{"current_steps": 715, "total_steps": 5305, "loss": 0.1914, "lr": 4.981894013631026e-06, "epoch": 0.6738925541941565, "percentage": 13.48, "elapsed_time": "0:03:35", "remaining_time": "0:23:01", "throughput": 7685.74, "total_tokens": 1654208}
{"current_steps": 720, "total_steps": 5305, "loss": 0.1985, "lr": 4.980892378560281e-06, "epoch": 0.6786050895381716, "percentage": 13.57, "elapsed_time": "0:03:35", "remaining_time": "0:22:53", "throughput": 7715.33, "total_tokens": 1664640}
{"current_steps": 725, "total_steps": 5305, "loss": 0.2831, "lr": 4.979863884946034e-06, "epoch": 0.6833176248821866, "percentage": 13.67, "elapsed_time": "0:03:36", "remaining_time": "0:22:46", "throughput": 7751.16, "total_tokens": 1676864}
{"current_steps": 730, "total_steps": 5305, "loss": 0.2082, "lr": 4.978808543922925e-06, "epoch": 0.6880301602262017, "percentage": 13.76, "elapsed_time": "0:03:36", "remaining_time": "0:22:39", "throughput": 7793.72, "total_tokens": 1691072}
{"current_steps": 735, "total_steps": 5305, "loss": 0.1227, "lr": 4.9777263669162465e-06, "epoch": 0.6927426955702167, "percentage": 13.85, "elapsed_time": "0:03:37", "remaining_time": "0:22:32", "throughput": 7825.88, "total_tokens": 1702400}
{"current_steps": 740, "total_steps": 5305, "loss": 0.1471, "lr": 4.976617365641822e-06, "epoch": 0.6974552309142319, "percentage": 13.95, "elapsed_time": "0:03:38", "remaining_time": "0:22:25", "throughput": 7862.35, "total_tokens": 1714944}
{"current_steps": 745, "total_steps": 5305, "loss": 0.3082, "lr": 4.97548155210588e-06, "epoch": 0.702167766258247, "percentage": 14.04, "elapsed_time": "0:03:38", "remaining_time": "0:22:18", "throughput": 7891.2, "total_tokens": 1725376}
{"current_steps": 750, "total_steps": 5305, "loss": 0.4432, "lr": 4.974318938604921e-06, "epoch": 0.706880301602262, "percentage": 14.14, "elapsed_time": "0:03:39", "remaining_time": "0:22:11", "throughput": 7924.52, "total_tokens": 1737152}
{"current_steps": 755, "total_steps": 5305, "loss": 0.1969, "lr": 4.9731295377255885e-06, "epoch": 0.7115928369462771, "percentage": 14.23, "elapsed_time": "0:03:39", "remaining_time": "0:22:04", "throughput": 7958.51, "total_tokens": 1749120}
{"current_steps": 760, "total_steps": 5305, "loss": 0.272, "lr": 4.971913362344529e-06, "epoch": 0.7163053722902922, "percentage": 14.33, "elapsed_time": "0:03:40", "remaining_time": "0:21:57", "throughput": 7989.48, "total_tokens": 1760384}
{"current_steps": 765, "total_steps": 5305, "loss": 0.1454, "lr": 4.970670425628255e-06, "epoch": 0.7210179076343073, "percentage": 14.42, "elapsed_time": "0:03:40", "remaining_time": "0:21:51", "throughput": 8026.42, "total_tokens": 1773632}
{"current_steps": 770, "total_steps": 5305, "loss": 0.184, "lr": 4.969400741032999e-06, "epoch": 0.7257304429783223, "percentage": 14.51, "elapsed_time": "0:03:41", "remaining_time": "0:21:45", "throughput": 8067.26, "total_tokens": 1787776}
{"current_steps": 775, "total_steps": 5305, "loss": 0.2148, "lr": 4.968104322304575e-06, "epoch": 0.7304429783223374, "percentage": 14.61, "elapsed_time": "0:03:42", "remaining_time": "0:21:38", "throughput": 8095.45, "total_tokens": 1798336}
{"current_steps": 780, "total_steps": 5305, "loss": 0.2897, "lr": 4.966781183478223e-06, "epoch": 0.7351555136663525, "percentage": 14.7, "elapsed_time": "0:03:42", "remaining_time": "0:21:31", "throughput": 8124.81, "total_tokens": 1809216}
{"current_steps": 785, "total_steps": 5305, "loss": 0.2981, "lr": 4.965431338878456e-06, "epoch": 0.7398680490103676, "percentage": 14.8, "elapsed_time": "0:03:43", "remaining_time": "0:21:25", "throughput": 8160.88, "total_tokens": 1822144}
{"current_steps": 790, "total_steps": 5305, "loss": 0.2476, "lr": 4.9640548031189125e-06, "epoch": 0.7445805843543827, "percentage": 14.89, "elapsed_time": "0:03:43", "remaining_time": "0:21:19", "throughput": 8189.84, "total_tokens": 1833088}
{"current_steps": 795, "total_steps": 5305, "loss": 0.2554, "lr": 4.962651591102191e-06, "epoch": 0.7492931196983977, "percentage": 14.99, "elapsed_time": "0:03:44", "remaining_time": "0:21:12", "throughput": 8222.32, "total_tokens": 1845056}
{"current_steps": 798, "total_steps": 5305, "eval_loss": 0.2178538739681244, "epoch": 0.7521206409048068, "percentage": 15.04, "elapsed_time": "0:03:47", "remaining_time": "0:21:24", "throughput": 8138.24, "total_tokens": 1851072}
{"current_steps": 800, "total_steps": 5305, "loss": 0.2507, "lr": 4.961221718019695e-06, "epoch": 0.7540056550424128, "percentage": 15.08, "elapsed_time": "0:04:17", "remaining_time": "0:24:10", "throughput": 7203.34, "total_tokens": 1855168}
{"current_steps": 805, "total_steps": 5305, "loss": 0.3006, "lr": 4.9597651993514585e-06, "epoch": 0.7587181903864278, "percentage": 15.17, "elapsed_time": "0:04:18", "remaining_time": "0:24:02", "throughput": 7234.36, "total_tokens": 1867328}
{"current_steps": 810, "total_steps": 5305, "loss": 0.1949, "lr": 4.9582820508659924e-06, "epoch": 0.763430725730443, "percentage": 15.27, "elapsed_time": "0:04:18", "remaining_time": "0:23:56", "throughput": 7274.47, "total_tokens": 1882560}
{"current_steps": 815, "total_steps": 5305, "loss": 0.1866, "lr": 4.956772288620101e-06, "epoch": 0.7681432610744581, "percentage": 15.36, "elapsed_time": "0:04:19", "remaining_time": "0:23:48", "throughput": 7301.06, "total_tokens": 1893376}
{"current_steps": 820, "total_steps": 5305, "loss": 0.1114, "lr": 4.955235928958716e-06, "epoch": 0.7728557964184731, "percentage": 15.46, "elapsed_time": "0:04:19", "remaining_time": "0:23:41", "throughput": 7333.26, "total_tokens": 1906048}
{"current_steps": 825, "total_steps": 5305, "loss": 0.2425, "lr": 4.953672988514716e-06, "epoch": 0.7775683317624882, "percentage": 15.55, "elapsed_time": "0:04:20", "remaining_time": "0:23:34", "throughput": 7361.84, "total_tokens": 1917568}
{"current_steps": 830, "total_steps": 5305, "loss": 0.4121, "lr": 4.95208348420875e-06, "epoch": 0.7822808671065034, "percentage": 15.65, "elapsed_time": "0:04:21", "remaining_time": "0:23:27", "throughput": 7390.73, "total_tokens": 1929216}
{"current_steps": 835, "total_steps": 5305, "loss": 0.1859, "lr": 4.950467433249046e-06, "epoch": 0.7869934024505184, "percentage": 15.74, "elapsed_time": "0:04:21", "remaining_time": "0:23:20", "throughput": 7417.87, "total_tokens": 1940416}
{"current_steps": 840, "total_steps": 5305, "loss": 0.2065, "lr": 4.948824853131237e-06, "epoch": 0.7917059377945335, "percentage": 15.83, "elapsed_time": "0:04:22", "remaining_time": "0:23:13", "throughput": 7439.16, "total_tokens": 1949632}
{"current_steps": 845, "total_steps": 5305, "loss": 0.2102, "lr": 4.94715576163816e-06, "epoch": 0.7964184731385485, "percentage": 15.93, "elapsed_time": "0:04:22", "remaining_time": "0:23:06", "throughput": 7469.51, "total_tokens": 1961920}
{"current_steps": 850, "total_steps": 5305, "loss": 0.2975, "lr": 4.945460176839671e-06, "epoch": 0.8011310084825636, "percentage": 16.02, "elapsed_time": "0:04:23", "remaining_time": "0:22:59", "throughput": 7498.35, "total_tokens": 1973696}
{"current_steps": 855, "total_steps": 5305, "loss": 0.294, "lr": 4.943738117092447e-06, "epoch": 0.8058435438265787, "percentage": 16.12, "elapsed_time": "0:04:23", "remaining_time": "0:22:52", "throughput": 7526.53, "total_tokens": 1985280}
{"current_steps": 860, "total_steps": 5305, "loss": 0.2107, "lr": 4.941989601039785e-06, "epoch": 0.8105560791705938, "percentage": 16.21, "elapsed_time": "0:04:24", "remaining_time": "0:22:46", "throughput": 7556.57, "total_tokens": 1997504}
{"current_steps": 865, "total_steps": 5305, "loss": 0.2815, "lr": 4.940214647611405e-06, "epoch": 0.8152686145146089, "percentage": 16.31, "elapsed_time": "0:04:24", "remaining_time": "0:22:39", "throughput": 7585.89, "total_tokens": 2009600}
{"current_steps": 870, "total_steps": 5305, "loss": 0.1509, "lr": 4.9384132760232395e-06, "epoch": 0.8199811498586239, "percentage": 16.4, "elapsed_time": "0:04:25", "remaining_time": "0:22:33", "throughput": 7611.99, "total_tokens": 2020672}
{"current_steps": 875, "total_steps": 5305, "loss": 0.258, "lr": 4.93658550577723e-06, "epoch": 0.824693685202639, "percentage": 16.49, "elapsed_time": "0:04:26", "remaining_time": "0:22:26", "throughput": 7643.01, "total_tokens": 2033408}
{"current_steps": 880, "total_steps": 5305, "loss": 0.2403, "lr": 4.9347313566611145e-06, "epoch": 0.8294062205466541, "percentage": 16.59, "elapsed_time": "0:04:26", "remaining_time": "0:22:20", "throughput": 7665.57, "total_tokens": 2043328}
{"current_steps": 885, "total_steps": 5305, "loss": 0.2631, "lr": 4.9328508487482115e-06, "epoch": 0.8341187558906692, "percentage": 16.68, "elapsed_time": "0:04:27", "remaining_time": "0:22:14", "throughput": 7692.3, "total_tokens": 2054656}
{"current_steps": 890, "total_steps": 5305, "loss": 0.2302, "lr": 4.930944002397204e-06, "epoch": 0.8388312912346843, "percentage": 16.78, "elapsed_time": "0:04:27", "remaining_time": "0:22:07", "throughput": 7713.4, "total_tokens": 2064128}
{"current_steps": 895, "total_steps": 5305, "loss": 0.2009, "lr": 4.929010838251923e-06, "epoch": 0.8435438265786993, "percentage": 16.87, "elapsed_time": "0:04:28", "remaining_time": "0:22:01", "throughput": 7743.97, "total_tokens": 2076864}
{"current_steps": 900, "total_steps": 5305, "loss": 0.1868, "lr": 4.927051377241115e-06, "epoch": 0.8482563619227145, "percentage": 16.97, "elapsed_time": "0:04:28", "remaining_time": "0:21:55", "throughput": 7766.98, "total_tokens": 2087104}
{"current_steps": 905, "total_steps": 5305, "loss": 0.3066, "lr": 4.9250656405782215e-06, "epoch": 0.8529688972667295, "percentage": 17.06, "elapsed_time": "0:04:29", "remaining_time": "0:21:49", "throughput": 7791.14, "total_tokens": 2097728}
{"current_steps": 910, "total_steps": 5305, "loss": 0.1685, "lr": 4.9230536497611525e-06, "epoch": 0.8576814326107446, "percentage": 17.15, "elapsed_time": "0:04:29", "remaining_time": "0:21:42", "throughput": 7813.94, "total_tokens": 2107904}
{"current_steps": 915, "total_steps": 5305, "loss": 0.3358, "lr": 4.921015426572047e-06, "epoch": 0.8623939679547596, "percentage": 17.25, "elapsed_time": "0:04:30", "remaining_time": "0:21:37", "throughput": 7842.59, "total_tokens": 2120192}
{"current_steps": 920, "total_steps": 5305, "loss": 0.2411, "lr": 4.918950993077039e-06, "epoch": 0.8671065032987747, "percentage": 17.34, "elapsed_time": "0:04:30", "remaining_time": "0:21:31", "throughput": 7869.53, "total_tokens": 2131904}
{"current_steps": 925, "total_steps": 5305, "loss": 0.3069, "lr": 4.91686037162602e-06, "epoch": 0.8718190386427899, "percentage": 17.44, "elapsed_time": "0:04:31", "remaining_time": "0:21:25", "throughput": 7899.2, "total_tokens": 2144640}
{"current_steps": 930, "total_steps": 5305, "loss": 0.1587, "lr": 4.9147435848523975e-06, "epoch": 0.8765315739868049, "percentage": 17.53, "elapsed_time": "0:04:32", "remaining_time": "0:21:19", "throughput": 7919.45, "total_tokens": 2154112}
{"current_steps": 935, "total_steps": 5305, "loss": 0.1468, "lr": 4.91260065567285e-06, "epoch": 0.88124410933082, "percentage": 17.62, "elapsed_time": "0:04:32", "remaining_time": "0:21:14", "throughput": 7950.2, "total_tokens": 2167232}
{"current_steps": 940, "total_steps": 5305, "loss": 0.2699, "lr": 4.910431607287075e-06, "epoch": 0.885956644674835, "percentage": 17.72, "elapsed_time": "0:04:33", "remaining_time": "0:21:08", "throughput": 7977.74, "total_tokens": 2179264}
{"current_steps": 945, "total_steps": 5305, "loss": 0.3797, "lr": 4.908236463177544e-06, "epoch": 0.8906691800188501, "percentage": 17.81, "elapsed_time": "0:04:33", "remaining_time": "0:21:03", "throughput": 8005.44, "total_tokens": 2191488}
{"current_steps": 950, "total_steps": 5305, "loss": 0.1988, "lr": 4.906015247109242e-06, "epoch": 0.8953817153628653, "percentage": 17.91, "elapsed_time": "0:04:34", "remaining_time": "0:20:57", "throughput": 8027.96, "total_tokens": 2201856}
{"current_steps": 955, "total_steps": 5305, "loss": 0.3161, "lr": 4.903767983129414e-06, "epoch": 0.9000942507068803, "percentage": 18.0, "elapsed_time": "0:04:34", "remaining_time": "0:20:51", "throughput": 8056.57, "total_tokens": 2214464}
{"current_steps": 960, "total_steps": 5305, "loss": 0.2565, "lr": 4.901494695567306e-06, "epoch": 0.9048067860508954, "percentage": 18.1, "elapsed_time": "0:04:35", "remaining_time": "0:20:46", "throughput": 8091.03, "total_tokens": 2229184}
{"current_steps": 965, "total_steps": 5305, "loss": 0.2214, "lr": 4.899195409033897e-06, "epoch": 0.9095193213949104, "percentage": 18.19, "elapsed_time": "0:04:36", "remaining_time": "0:20:41", "throughput": 8111.86, "total_tokens": 2239104}
{"current_steps": 970, "total_steps": 5305, "loss": 0.1992, "lr": 4.896870148421637e-06, "epoch": 0.9142318567389256, "percentage": 18.28, "elapsed_time": "0:04:36", "remaining_time": "0:20:35", "throughput": 8133.12, "total_tokens": 2249152}
{"current_steps": 975, "total_steps": 5305, "loss": 0.1527, "lr": 4.894518938904175e-06, "epoch": 0.9189443920829407, "percentage": 18.38, "elapsed_time": "0:04:37", "remaining_time": "0:20:30", "throughput": 8159.97, "total_tokens": 2261312}
{"current_steps": 980, "total_steps": 5305, "loss": 0.1398, "lr": 4.892141805936085e-06, "epoch": 0.9236569274269557, "percentage": 18.47, "elapsed_time": "0:04:37", "remaining_time": "0:20:25", "throughput": 8191.34, "total_tokens": 2275008}
{"current_steps": 985, "total_steps": 5305, "loss": 0.276, "lr": 4.889738775252596e-06, "epoch": 0.9283694627709708, "percentage": 18.57, "elapsed_time": "0:04:38", "remaining_time": "0:20:20", "throughput": 8219.48, "total_tokens": 2287680}
{"current_steps": 990, "total_steps": 5305, "loss": 0.2869, "lr": 4.887309872869308e-06, "epoch": 0.9330819981149858, "percentage": 18.66, "elapsed_time": "0:04:38", "remaining_time": "0:20:15", "throughput": 8246.18, "total_tokens": 2299840}
{"current_steps": 995, "total_steps": 5305, "loss": 0.2347, "lr": 4.884855125081912e-06, "epoch": 0.937794533459001, "percentage": 18.76, "elapsed_time": "0:04:39", "remaining_time": "0:20:10", "throughput": 8270.44, "total_tokens": 2311104}
{"current_steps": 1000, "total_steps": 5305, "loss": 0.326, "lr": 4.882374558465906e-06, "epoch": 0.942507068803016, "percentage": 18.85, "elapsed_time": "0:04:39", "remaining_time": "0:20:05", "throughput": 8294.58, "total_tokens": 2322432}
{"current_steps": 1005, "total_steps": 5305, "loss": 0.2946, "lr": 4.8798681998763056e-06, "epoch": 0.9472196041470311, "percentage": 18.94, "elapsed_time": "0:04:40", "remaining_time": "0:20:00", "throughput": 8316.99, "total_tokens": 2333120}
{"current_steps": 1010, "total_steps": 5305, "loss": 0.2846, "lr": 4.877336076447358e-06, "epoch": 0.9519321394910462, "percentage": 19.04, "elapsed_time": "0:04:41", "remaining_time": "0:19:55", "throughput": 8343.73, "total_tokens": 2345472}
{"current_steps": 1015, "total_steps": 5305, "loss": 0.1988, "lr": 4.87477821559224e-06, "epoch": 0.9566446748350612, "percentage": 19.13, "elapsed_time": "0:04:41", "remaining_time": "0:19:50", "throughput": 8369.85, "total_tokens": 2357568}
{"current_steps": 1020, "total_steps": 5305, "loss": 0.2295, "lr": 4.87219464500277e-06, "epoch": 0.9613572101790764, "percentage": 19.23, "elapsed_time": "0:04:42", "remaining_time": "0:19:45", "throughput": 8391.35, "total_tokens": 2368064}
{"current_steps": 1025, "total_steps": 5305, "loss": 0.2166, "lr": 4.869585392649102e-06, "epoch": 0.9660697455230914, "percentage": 19.32, "elapsed_time": "0:04:42", "remaining_time": "0:19:40", "throughput": 8420.05, "total_tokens": 2381184}
{"current_steps": 1030, "total_steps": 5305, "loss": 0.1964, "lr": 4.866950486779425e-06, "epoch": 0.9707822808671065, "percentage": 19.42, "elapsed_time": "0:04:43", "remaining_time": "0:19:36", "throughput": 8446.06, "total_tokens": 2393408}
{"current_steps": 1035, "total_steps": 5305, "loss": 0.2603, "lr": 4.864289955919658e-06, "epoch": 0.9754948162111216, "percentage": 19.51, "elapsed_time": "0:04:43", "remaining_time": "0:19:31", "throughput": 8474.95, "total_tokens": 2406720}
{"current_steps": 1040, "total_steps": 5305, "loss": 0.3101, "lr": 4.8616038288731394e-06, "epoch": 0.9802073515551367, "percentage": 19.6, "elapsed_time": "0:04:44", "remaining_time": "0:19:27", "throughput": 8504.38, "total_tokens": 2420288}
{"current_steps": 1045, "total_steps": 5305, "loss": 0.1463, "lr": 4.8588921347203175e-06, "epoch": 0.9849198868991518, "percentage": 19.7, "elapsed_time": "0:04:45", "remaining_time": "0:19:22", "throughput": 8527.42, "total_tokens": 2431488}
{"current_steps": 1050, "total_steps": 5305, "loss": 0.2497, "lr": 4.8561549028184315e-06, "epoch": 0.9896324222431668, "percentage": 19.79, "elapsed_time": "0:04:45", "remaining_time": "0:19:17", "throughput": 8553.88, "total_tokens": 2444032}
{"current_steps": 1055, "total_steps": 5305, "loss": 0.1574, "lr": 4.8533921628012e-06, "epoch": 0.9943449575871819, "percentage": 19.89, "elapsed_time": "0:04:46", "remaining_time": "0:19:13", "throughput": 8575.73, "total_tokens": 2454912}
{"current_steps": 1060, "total_steps": 5305, "loss": 0.3676, "lr": 4.850603944578494e-06, "epoch": 0.9990574929311969, "percentage": 19.98, "elapsed_time": "0:04:46", "remaining_time": "0:19:08", "throughput": 8602.42, "total_tokens": 2467584}
{"current_steps": 1064, "total_steps": 5305, "eval_loss": 0.18848362565040588, "epoch": 1.002827521206409, "percentage": 20.06, "elapsed_time": "0:04:50", "remaining_time": "0:19:16", "throughput": 8534.86, "total_tokens": 2475808}
{"current_steps": 1065, "total_steps": 5305, "loss": 0.1493, "lr": 4.847790278336017e-06, "epoch": 1.003770028275212, "percentage": 20.08, "elapsed_time": "0:05:33", "remaining_time": "0:22:06", "throughput": 7440.06, "total_tokens": 2478048}
{"current_steps": 1070, "total_steps": 5305, "loss": 0.1749, "lr": 4.844951194534975e-06, "epoch": 1.0084825636192272, "percentage": 20.17, "elapsed_time": "0:05:33", "remaining_time": "0:22:00", "throughput": 7469.27, "total_tokens": 2492576}
{"current_steps": 1075, "total_steps": 5305, "loss": 0.1307, "lr": 4.842086723911751e-06, "epoch": 1.0131950989632421, "percentage": 20.26, "elapsed_time": "0:05:34", "remaining_time": "0:21:55", "throughput": 7494.44, "total_tokens": 2505440}
{"current_steps": 1080, "total_steps": 5305, "loss": 0.1119, "lr": 4.839196897477569e-06, "epoch": 1.0179076343072573, "percentage": 20.36, "elapsed_time": "0:05:34", "remaining_time": "0:21:49", "throughput": 7512.96, "total_tokens": 2515488}
{"current_steps": 1085, "total_steps": 5305, "loss": 0.1664, "lr": 4.836281746518159e-06, "epoch": 1.0226201696512724, "percentage": 20.45, "elapsed_time": "0:05:35", "remaining_time": "0:21:44", "throughput": 7540.8, "total_tokens": 2529504}
{"current_steps": 1090, "total_steps": 5305, "loss": 0.1393, "lr": 4.833341302593417e-06, "epoch": 1.0273327049952874, "percentage": 20.55, "elapsed_time": "0:05:35", "remaining_time": "0:21:39", "throughput": 7559.85, "total_tokens": 2539872}
{"current_steps": 1095, "total_steps": 5305, "loss": 0.0376, "lr": 4.830375597537068e-06, "epoch": 1.0320452403393026, "percentage": 20.64, "elapsed_time": "0:05:36", "remaining_time": "0:21:33", "throughput": 7577.87, "total_tokens": 2549856}
{"current_steps": 1100, "total_steps": 5305, "loss": 0.1836, "lr": 4.827384663456315e-06, "epoch": 1.0367577756833177, "percentage": 20.74, "elapsed_time": "0:05:36", "remaining_time": "0:21:28", "throughput": 7594.8, "total_tokens": 2559328}
{"current_steps": 1105, "total_steps": 5305, "loss": 0.369, "lr": 4.824368532731496e-06, "epoch": 1.0414703110273327, "percentage": 20.83, "elapsed_time": "0:05:37", "remaining_time": "0:21:22", "throughput": 7613.14, "total_tokens": 2569440}
{"current_steps": 1110, "total_steps": 5305, "loss": 0.084, "lr": 4.821327238015732e-06, "epoch": 1.0461828463713478, "percentage": 20.92, "elapsed_time": "0:05:38", "remaining_time": "0:21:17", "throughput": 7633.67, "total_tokens": 2580448}
{"current_steps": 1115, "total_steps": 5305, "loss": 0.4176, "lr": 4.818260812234572e-06, "epoch": 1.0508953817153628, "percentage": 21.02, "elapsed_time": "0:05:38", "remaining_time": "0:21:12", "throughput": 7650.62, "total_tokens": 2590752}
{"current_steps": 1120, "total_steps": 5305, "loss": 0.0664, "lr": 4.815169288585641e-06, "epoch": 1.055607917059378, "percentage": 21.11, "elapsed_time": "0:05:39", "remaining_time": "0:21:07", "throughput": 7667.19, "total_tokens": 2600160}
{"current_steps": 1125, "total_steps": 5305, "loss": 0.1558, "lr": 4.812052700538274e-06, "epoch": 1.0603204524033931, "percentage": 21.21, "elapsed_time": "0:05:39", "remaining_time": "0:21:02", "throughput": 7687.55, "total_tokens": 2611232}
{"current_steps": 1130, "total_steps": 5305, "loss": 0.1476, "lr": 4.808911081833161e-06, "epoch": 1.065032987747408, "percentage": 21.3, "elapsed_time": "0:05:40", "remaining_time": "0:20:57", "throughput": 7710.97, "total_tokens": 2623712}
{"current_steps": 1135, "total_steps": 5305, "loss": 0.0875, "lr": 4.805744466481974e-06, "epoch": 1.0697455230914232, "percentage": 21.39, "elapsed_time": "0:05:40", "remaining_time": "0:20:52", "throughput": 7733.82, "total_tokens": 2635936}
{"current_steps": 1140, "total_steps": 5305, "loss": 0.1297, "lr": 4.802552888767005e-06, "epoch": 1.0744580584354382, "percentage": 21.49, "elapsed_time": "0:05:41", "remaining_time": "0:20:47", "throughput": 7751.44, "total_tokens": 2645920}
{"current_steps": 1145, "total_steps": 5305, "loss": 0.2563, "lr": 4.799336383240793e-06, "epoch": 1.0791705937794533, "percentage": 21.58, "elapsed_time": "0:05:41", "remaining_time": "0:20:42", "throughput": 7780.07, "total_tokens": 2660768}
{"current_steps": 1150, "total_steps": 5305, "loss": 0.1484, "lr": 4.796094984725749e-06, "epoch": 1.0838831291234685, "percentage": 21.68, "elapsed_time": "0:05:42", "remaining_time": "0:20:37", "throughput": 7798.63, "total_tokens": 2671200}
{"current_steps": 1155, "total_steps": 5305, "loss": 0.1145, "lr": 4.792828728313778e-06, "epoch": 1.0885956644674835, "percentage": 21.77, "elapsed_time": "0:05:43", "remaining_time": "0:20:32", "throughput": 7820.37, "total_tokens": 2683040}
{"current_steps": 1160, "total_steps": 5305, "loss": 0.0767, "lr": 4.789537649365904e-06, "epoch": 1.0933081998114986, "percentage": 21.87, "elapsed_time": "0:05:43", "remaining_time": "0:20:27", "throughput": 7841.1, "total_tokens": 2694432}
{"current_steps": 1165, "total_steps": 5305, "loss": 0.0079, "lr": 4.78622178351188e-06, "epoch": 1.0980207351555136, "percentage": 21.96, "elapsed_time": "0:05:44", "remaining_time": "0:20:23", "throughput": 7864.69, "total_tokens": 2707168}
{"current_steps": 1170, "total_steps": 5305, "loss": 0.1644, "lr": 4.782881166649808e-06, "epoch": 1.1027332704995287, "percentage": 22.05, "elapsed_time": "0:05:44", "remaining_time": "0:20:18", "throughput": 7883.68, "total_tokens": 2717984}
{"current_steps": 1175, "total_steps": 5305, "loss": 0.2543, "lr": 4.77951583494575e-06, "epoch": 1.107445805843544, "percentage": 22.15, "elapsed_time": "0:05:45", "remaining_time": "0:20:13", "throughput": 7907.38, "total_tokens": 2730784}
{"current_steps": 1180, "total_steps": 5305, "loss": 0.4821, "lr": 4.77612582483333e-06, "epoch": 1.1121583411875589, "percentage": 22.24, "elapsed_time": "0:05:45", "remaining_time": "0:20:09", "throughput": 7924.23, "total_tokens": 2740704}
{"current_steps": 1185, "total_steps": 5305, "loss": 0.2498, "lr": 4.772711173013352e-06, "epoch": 1.116870876531574, "percentage": 22.34, "elapsed_time": "0:05:46", "remaining_time": "0:20:04", "throughput": 7944.13, "total_tokens": 2751968}
{"current_steps": 1190, "total_steps": 5305, "loss": 0.1649, "lr": 4.769271916453387e-06, "epoch": 1.121583411875589, "percentage": 22.43, "elapsed_time": "0:05:46", "remaining_time": "0:19:59", "throughput": 7965.25, "total_tokens": 2763808}
{"current_steps": 1195, "total_steps": 5305, "loss": 0.0735, "lr": 4.765808092387385e-06, "epoch": 1.1262959472196041, "percentage": 22.53, "elapsed_time": "0:05:47", "remaining_time": "0:19:55", "throughput": 7984.15, "total_tokens": 2774624}
{"current_steps": 1200, "total_steps": 5305, "loss": 0.2639, "lr": 4.762319738315269e-06, "epoch": 1.1310084825636193, "percentage": 22.62, "elapsed_time": "0:05:48", "remaining_time": "0:19:50", "throughput": 8003.92, "total_tokens": 2785888}
{"current_steps": 1205, "total_steps": 5305, "loss": 0.3194, "lr": 4.758806892002526e-06, "epoch": 1.1357210179076342, "percentage": 22.71, "elapsed_time": "0:05:48", "remaining_time": "0:19:46", "throughput": 8023.84, "total_tokens": 2797216}
{"current_steps": 1210, "total_steps": 5305, "loss": 0.1395, "lr": 4.7552695914798e-06, "epoch": 1.1404335532516494, "percentage": 22.81, "elapsed_time": "0:05:49", "remaining_time": "0:19:41", "throughput": 8042.47, "total_tokens": 2808032}
{"current_steps": 1215, "total_steps": 5305, "loss": 0.2734, "lr": 4.751707875042481e-06, "epoch": 1.1451460885956646, "percentage": 22.9, "elapsed_time": "0:05:49", "remaining_time": "0:19:37", "throughput": 8070.04, "total_tokens": 2823008}
{"current_steps": 1220, "total_steps": 5305, "loss": 0.0883, "lr": 4.748121781250288e-06, "epoch": 1.1498586239396795, "percentage": 23.0, "elapsed_time": "0:05:50", "remaining_time": "0:19:33", "throughput": 8093.18, "total_tokens": 2835936}
{"current_steps": 1225, "total_steps": 5305, "loss": 0.169, "lr": 4.744511348926855e-06, "epoch": 1.1545711592836947, "percentage": 23.09, "elapsed_time": "0:05:50", "remaining_time": "0:19:28", "throughput": 8113.49, "total_tokens": 2847584}
{"current_steps": 1230, "total_steps": 5305, "loss": 0.1451, "lr": 4.740876617159308e-06, "epoch": 1.1592836946277096, "percentage": 23.19, "elapsed_time": "0:05:51", "remaining_time": "0:19:24", "throughput": 8130.84, "total_tokens": 2857952}
{"current_steps": 1235, "total_steps": 5305, "loss": 0.2114, "lr": 4.737217625297844e-06, "epoch": 1.1639962299717248, "percentage": 23.28, "elapsed_time": "0:05:52", "remaining_time": "0:19:20", "throughput": 8147.9, "total_tokens": 2868192}
{"current_steps": 1240, "total_steps": 5305, "loss": 0.1145, "lr": 4.733534412955301e-06, "epoch": 1.1687087653157398, "percentage": 23.37, "elapsed_time": "0:05:52", "remaining_time": "0:19:15", "throughput": 8168.09, "total_tokens": 2879904}
{"current_steps": 1245, "total_steps": 5305, "loss": 0.1768, "lr": 4.729827020006735e-06, "epoch": 1.173421300659755, "percentage": 23.47, "elapsed_time": "0:05:53", "remaining_time": "0:19:11", "throughput": 8189.94, "total_tokens": 2892384}
{"current_steps": 1250, "total_steps": 5305, "loss": 0.1507, "lr": 4.726095486588983e-06, "epoch": 1.17813383600377, "percentage": 23.56, "elapsed_time": "0:05:53", "remaining_time": "0:19:07", "throughput": 8212.31, "total_tokens": 2905184}
{"current_steps": 1255, "total_steps": 5305, "loss": 0.0958, "lr": 4.722339853100232e-06, "epoch": 1.182846371347785, "percentage": 23.66, "elapsed_time": "0:05:54", "remaining_time": "0:19:03", "throughput": 8231.86, "total_tokens": 2916640}
{"current_steps": 1260, "total_steps": 5305, "loss": 0.1192, "lr": 4.718560160199579e-06, "epoch": 1.1875589066918002, "percentage": 23.75, "elapsed_time": "0:05:54", "remaining_time": "0:18:59", "throughput": 8249.0, "total_tokens": 2927072}
{"current_steps": 1265, "total_steps": 5305, "loss": 0.2693, "lr": 4.714756448806592e-06, "epoch": 1.1922714420358154, "percentage": 23.85, "elapsed_time": "0:05:55", "remaining_time": "0:18:54", "throughput": 8266.79, "total_tokens": 2937888}
{"current_steps": 1270, "total_steps": 5305, "loss": 0.1689, "lr": 4.71092876010087e-06, "epoch": 1.1969839773798303, "percentage": 23.94, "elapsed_time": "0:05:55", "remaining_time": "0:18:51", "throughput": 8288.98, "total_tokens": 2950752}
{"current_steps": 1275, "total_steps": 5305, "loss": 0.0997, "lr": 4.70707713552159e-06, "epoch": 1.2016965127238455, "percentage": 24.03, "elapsed_time": "0:05:56", "remaining_time": "0:18:46", "throughput": 8305.66, "total_tokens": 2961056}
{"current_steps": 1280, "total_steps": 5305, "loss": 0.1164, "lr": 4.703201616767067e-06, "epoch": 1.2064090480678604, "percentage": 24.13, "elapsed_time": "0:05:57", "remaining_time": "0:18:42", "throughput": 8322.67, "total_tokens": 2971552}
{"current_steps": 1285, "total_steps": 5305, "loss": 0.0178, "lr": 4.699302245794293e-06, "epoch": 1.2111215834118756, "percentage": 24.22, "elapsed_time": "0:05:57", "remaining_time": "0:18:38", "throughput": 8346.32, "total_tokens": 2985120}
{"current_steps": 1290, "total_steps": 5305, "loss": 0.1821, "lr": 4.6953790648184924e-06, "epoch": 1.2158341187558908, "percentage": 24.32, "elapsed_time": "0:05:58", "remaining_time": "0:18:34", "throughput": 8364.29, "total_tokens": 2996128}
{"current_steps": 1295, "total_steps": 5305, "loss": 0.0199, "lr": 4.691432116312661e-06, "epoch": 1.2205466540999057, "percentage": 24.41, "elapsed_time": "0:05:58", "remaining_time": "0:18:30", "throughput": 8382.15, "total_tokens": 3007072}
{"current_steps": 1300, "total_steps": 5305, "loss": 0.006, "lr": 4.687461443007101e-06, "epoch": 1.2252591894439209, "percentage": 24.51, "elapsed_time": "0:05:59", "remaining_time": "0:18:26", "throughput": 8401.26, "total_tokens": 3018656}
{"current_steps": 1305, "total_steps": 5305, "loss": 0.1915, "lr": 4.683467087888967e-06, "epoch": 1.2299717247879358, "percentage": 24.6, "elapsed_time": "0:05:59", "remaining_time": "0:18:23", "throughput": 8421.27, "total_tokens": 3030624}
{"current_steps": 1310, "total_steps": 5305, "loss": 0.2276, "lr": 4.6794490942017955e-06, "epoch": 1.234684260131951, "percentage": 24.69, "elapsed_time": "0:06:00", "remaining_time": "0:18:19", "throughput": 8442.16, "total_tokens": 3043040}
{"current_steps": 1315, "total_steps": 5305, "loss": 0.0236, "lr": 4.6754075054450385e-06, "epoch": 1.2393967954759662, "percentage": 24.79, "elapsed_time": "0:06:01", "remaining_time": "0:18:15", "throughput": 8467.5, "total_tokens": 3057632}
{"current_steps": 1320, "total_steps": 5305, "loss": 0.1376, "lr": 4.671342365373592e-06, "epoch": 1.244109330819981, "percentage": 24.88, "elapsed_time": "0:06:01", "remaining_time": "0:18:11", "throughput": 8487.61, "total_tokens": 3069792}
{"current_steps": 1325, "total_steps": 5305, "loss": 0.2062, "lr": 4.667253717997324e-06, "epoch": 1.2488218661639963, "percentage": 24.98, "elapsed_time": "0:06:02", "remaining_time": "0:18:08", "throughput": 8504.83, "total_tokens": 3080608}
{"current_steps": 1330, "total_steps": 5305, "loss": 0.165, "lr": 4.663141607580589e-06, "epoch": 1.2535344015080114, "percentage": 25.07, "elapsed_time": "0:06:02", "remaining_time": "0:18:04", "throughput": 8522.31, "total_tokens": 3091552}
{"current_steps": 1330, "total_steps": 5305, "eval_loss": 0.4607957601547241, "epoch": 1.2535344015080114, "percentage": 25.07, "elapsed_time": "0:06:05", "remaining_time": "0:18:12", "throughput": 8458.8, "total_tokens": 3091552}
{"current_steps": 1335, "total_steps": 5305, "loss": 0.222, "lr": 4.659006078641766e-06, "epoch": 1.2582469368520264, "percentage": 25.16, "elapsed_time": "0:06:52", "remaining_time": "0:20:26", "throughput": 7527.24, "total_tokens": 3103712}
{"current_steps": 1340, "total_steps": 5305, "loss": 0.2312, "lr": 4.6548471759527634e-06, "epoch": 1.2629594721960415, "percentage": 25.26, "elapsed_time": "0:06:52", "remaining_time": "0:20:21", "throughput": 7544.66, "total_tokens": 3115104}
{"current_steps": 1345, "total_steps": 5305, "loss": 0.011, "lr": 4.6506649445385335e-06, "epoch": 1.2676720075400565, "percentage": 25.35, "elapsed_time": "0:06:53", "remaining_time": "0:20:17", "throughput": 7564.33, "total_tokens": 3127648}
{"current_steps": 1350, "total_steps": 5305, "loss": 0.2732, "lr": 4.646459429676594e-06, "epoch": 1.2723845428840717, "percentage": 25.45, "elapsed_time": "0:06:54", "remaining_time": "0:20:12", "throughput": 7580.02, "total_tokens": 3138208}
{"current_steps": 1355, "total_steps": 5305, "loss": 0.148, "lr": 4.642230676896531e-06, "epoch": 1.2770970782280866, "percentage": 25.54, "elapsed_time": "0:06:54", "remaining_time": "0:20:08", "throughput": 7594.7, "total_tokens": 3148256}
{"current_steps": 1360, "total_steps": 5305, "loss": 0.0901, "lr": 4.6379787319795076e-06, "epoch": 1.2818096135721018, "percentage": 25.64, "elapsed_time": "0:06:55", "remaining_time": "0:20:03", "throughput": 7608.47, "total_tokens": 3157856}
{"current_steps": 1365, "total_steps": 5305, "loss": 0.24, "lr": 4.6337036409577705e-06, "epoch": 1.286522148916117, "percentage": 25.73, "elapsed_time": "0:06:55", "remaining_time": "0:19:59", "throughput": 7621.68, "total_tokens": 3167136}
{"current_steps": 1370, "total_steps": 5305, "loss": 0.0842, "lr": 4.62940545011415e-06, "epoch": 1.2912346842601319, "percentage": 25.82, "elapsed_time": "0:06:56", "remaining_time": "0:19:55", "throughput": 7645.08, "total_tokens": 3181984}
{"current_steps": 1375, "total_steps": 5305, "loss": 0.1368, "lr": 4.625084205981554e-06, "epoch": 1.295947219604147, "percentage": 25.92, "elapsed_time": "0:06:56", "remaining_time": "0:19:51", "throughput": 7666.59, "total_tokens": 3195744}
{"current_steps": 1380, "total_steps": 5305, "loss": 0.2497, "lr": 4.620739955342476e-06, "epoch": 1.3006597549481622, "percentage": 26.01, "elapsed_time": "0:06:57", "remaining_time": "0:19:47", "throughput": 7684.83, "total_tokens": 3207776}
{"current_steps": 1385, "total_steps": 5305, "loss": 0.0782, "lr": 4.616372745228477e-06, "epoch": 1.3053722902921772, "percentage": 26.11, "elapsed_time": "0:06:57", "remaining_time": "0:19:43", "throughput": 7702.02, "total_tokens": 3219296}
{"current_steps": 1390, "total_steps": 5305, "loss": 0.3956, "lr": 4.611982622919684e-06, "epoch": 1.3100848256361923, "percentage": 26.2, "elapsed_time": "0:06:58", "remaining_time": "0:19:38", "throughput": 7717.78, "total_tokens": 3230048}
{"current_steps": 1395, "total_steps": 5305, "loss": 0.1166, "lr": 4.607569635944271e-06, "epoch": 1.3147973609802073, "percentage": 26.3, "elapsed_time": "0:06:59", "remaining_time": "0:19:34", "throughput": 7730.55, "total_tokens": 3239200}
{"current_steps": 1400, "total_steps": 5305, "loss": 0.2557, "lr": 4.603133832077953e-06, "epoch": 1.3195098963242224, "percentage": 26.39, "elapsed_time": "0:06:59", "remaining_time": "0:19:30", "throughput": 7755.46, "total_tokens": 3255008}
{"current_steps": 1405, "total_steps": 5305, "loss": 0.2547, "lr": 4.598675259343462e-06, "epoch": 1.3242224316682374, "percentage": 26.48, "elapsed_time": "0:07:00", "remaining_time": "0:19:26", "throughput": 7773.58, "total_tokens": 3267040}
{"current_steps": 1410, "total_steps": 5305, "loss": 0.2374, "lr": 4.594193966010031e-06, "epoch": 1.3289349670122526, "percentage": 26.58, "elapsed_time": "0:07:00", "remaining_time": "0:19:22", "throughput": 7787.7, "total_tokens": 3276960}
{"current_steps": 1415, "total_steps": 5305, "loss": 0.0795, "lr": 4.589690000592868e-06, "epoch": 1.3336475023562677, "percentage": 26.67, "elapsed_time": "0:07:01", "remaining_time": "0:19:18", "throughput": 7803.5, "total_tokens": 3287840}
{"current_steps": 1420, "total_steps": 5305, "loss": 0.2095, "lr": 4.585163411852632e-06, "epoch": 1.3383600377002827, "percentage": 26.77, "elapsed_time": "0:07:01", "remaining_time": "0:19:14", "throughput": 7822.06, "total_tokens": 3300256}
{"current_steps": 1425, "total_steps": 5305, "loss": 0.3144, "lr": 4.58061424879491e-06, "epoch": 1.3430725730442978, "percentage": 26.86, "elapsed_time": "0:07:02", "remaining_time": "0:19:10", "throughput": 7838.93, "total_tokens": 3311712}
{"current_steps": 1430, "total_steps": 5305, "loss": 0.1113, "lr": 4.576042560669678e-06, "epoch": 1.347785108388313, "percentage": 26.96, "elapsed_time": "0:07:02", "remaining_time": "0:19:06", "throughput": 7853.88, "total_tokens": 3322144}
{"current_steps": 1435, "total_steps": 5305, "loss": 0.4022, "lr": 4.571448396970773e-06, "epoch": 1.352497643732328, "percentage": 27.05, "elapsed_time": "0:07:03", "remaining_time": "0:19:02", "throughput": 7871.19, "total_tokens": 3333856}
{"current_steps": 1440, "total_steps": 5305, "loss": 0.1542, "lr": 4.566831807435359e-06, "epoch": 1.3572101790763431, "percentage": 27.14, "elapsed_time": "0:07:04", "remaining_time": "0:18:58", "throughput": 7888.5, "total_tokens": 3345696}
{"current_steps": 1445, "total_steps": 5305, "loss": 0.2594, "lr": 4.562192842043381e-06, "epoch": 1.3619227144203583, "percentage": 27.24, "elapsed_time": "0:07:04", "remaining_time": "0:18:54", "throughput": 7904.84, "total_tokens": 3357024}
{"current_steps": 1450, "total_steps": 5305, "loss": 0.1721, "lr": 4.557531551017034e-06, "epoch": 1.3666352497643732, "percentage": 27.33, "elapsed_time": "0:07:05", "remaining_time": "0:18:50", "throughput": 7921.4, "total_tokens": 3368480}
{"current_steps": 1455, "total_steps": 5305, "loss": 0.1418, "lr": 4.552847984820208e-06, "epoch": 1.3713477851083884, "percentage": 27.43, "elapsed_time": "0:07:05", "remaining_time": "0:18:46", "throughput": 7935.75, "total_tokens": 3378720}
{"current_steps": 1460, "total_steps": 5305, "loss": 0.1344, "lr": 4.548142194157951e-06, "epoch": 1.3760603204524033, "percentage": 27.52, "elapsed_time": "0:07:06", "remaining_time": "0:18:42", "throughput": 7953.21, "total_tokens": 3390688}
{"current_steps": 1465, "total_steps": 5305, "loss": 0.2518, "lr": 4.54341422997592e-06, "epoch": 1.3807728557964185, "percentage": 27.62, "elapsed_time": "0:07:06", "remaining_time": "0:18:39", "throughput": 7972.12, "total_tokens": 3403488}
{"current_steps": 1470, "total_steps": 5305, "loss": 0.1194, "lr": 4.538664143459819e-06, "epoch": 1.3854853911404335, "percentage": 27.71, "elapsed_time": "0:07:07", "remaining_time": "0:18:35", "throughput": 7989.84, "total_tokens": 3415648}
{"current_steps": 1475, "total_steps": 5305, "loss": 0.1113, "lr": 4.5338919860348565e-06, "epoch": 1.3901979264844486, "percentage": 27.8, "elapsed_time": "0:07:08", "remaining_time": "0:18:31", "throughput": 8006.45, "total_tokens": 3427168}
{"current_steps": 1480, "total_steps": 5305, "loss": 0.1426, "lr": 4.529097809365184e-06, "epoch": 1.3949104618284638, "percentage": 27.9, "elapsed_time": "0:07:08", "remaining_time": "0:18:27", "throughput": 8021.11, "total_tokens": 3437664}
{"current_steps": 1485, "total_steps": 5305, "loss": 0.3136, "lr": 4.524281665353334e-06, "epoch": 1.3996229971724787, "percentage": 27.99, "elapsed_time": "0:07:09", "remaining_time": "0:18:23", "throughput": 8039.27, "total_tokens": 3450144}
{"current_steps": 1490, "total_steps": 5305, "loss": 0.1617, "lr": 4.519443606139665e-06, "epoch": 1.404335532516494, "percentage": 28.09, "elapsed_time": "0:07:09", "remaining_time": "0:18:20", "throughput": 8055.0, "total_tokens": 3461280}
{"current_steps": 1495, "total_steps": 5305, "loss": 0.2666, "lr": 4.514583684101792e-06, "epoch": 1.409048067860509, "percentage": 28.18, "elapsed_time": "0:07:10", "remaining_time": "0:18:16", "throughput": 8070.97, "total_tokens": 3472608}
{"current_steps": 1500, "total_steps": 5305, "loss": 0.105, "lr": 4.509701951854018e-06, "epoch": 1.413760603204524, "percentage": 28.28, "elapsed_time": "0:07:10", "remaining_time": "0:18:12", "throughput": 8088.92, "total_tokens": 3485024}
{"current_steps": 1505, "total_steps": 5305, "loss": 0.2341, "lr": 4.504798462246768e-06, "epoch": 1.4184731385485392, "percentage": 28.37, "elapsed_time": "0:07:11", "remaining_time": "0:18:09", "throughput": 8104.34, "total_tokens": 3496096}
{"current_steps": 1510, "total_steps": 5305, "loss": 0.2829, "lr": 4.499873268366017e-06, "epoch": 1.4231856738925541, "percentage": 28.46, "elapsed_time": "0:07:11", "remaining_time": "0:18:05", "throughput": 8119.21, "total_tokens": 3506848}
{"current_steps": 1515, "total_steps": 5305, "loss": 0.1819, "lr": 4.494926423532715e-06, "epoch": 1.4278982092365693, "percentage": 28.56, "elapsed_time": "0:07:12", "remaining_time": "0:18:02", "throughput": 8140.94, "total_tokens": 3521568}
{"current_steps": 1520, "total_steps": 5305, "loss": 0.1103, "lr": 4.4899579813022046e-06, "epoch": 1.4326107445805842, "percentage": 28.65, "elapsed_time": "0:07:13", "remaining_time": "0:17:58", "throughput": 8158.46, "total_tokens": 3533856}
{"current_steps": 1525, "total_steps": 5305, "loss": 0.216, "lr": 4.484967995463648e-06, "epoch": 1.4373232799245994, "percentage": 28.75, "elapsed_time": "0:07:13", "remaining_time": "0:17:54", "throughput": 8173.11, "total_tokens": 3544544}
{"current_steps": 1530, "total_steps": 5305, "loss": 0.303, "lr": 4.479956520039443e-06, "epoch": 1.4420358152686146, "percentage": 28.84, "elapsed_time": "0:07:14", "remaining_time": "0:17:51", "throughput": 8186.1, "total_tokens": 3554336}
{"current_steps": 1535, "total_steps": 5305, "loss": 0.0434, "lr": 4.474923609284635e-06, "epoch": 1.4467483506126295, "percentage": 28.93, "elapsed_time": "0:07:14", "remaining_time": "0:17:47", "throughput": 8199.44, "total_tokens": 3564384}
{"current_steps": 1540, "total_steps": 5305, "loss": 0.1438, "lr": 4.469869317686332e-06, "epoch": 1.4514608859566447, "percentage": 29.03, "elapsed_time": "0:07:15", "remaining_time": "0:17:44", "throughput": 8217.32, "total_tokens": 3576992}
{"current_steps": 1545, "total_steps": 5305, "loss": 0.1766, "lr": 4.464793699963116e-06, "epoch": 1.4561734213006599, "percentage": 29.12, "elapsed_time": "0:07:15", "remaining_time": "0:17:40", "throughput": 8232.21, "total_tokens": 3587872}
{"current_steps": 1550, "total_steps": 5305, "loss": 0.0997, "lr": 4.4596968110644484e-06, "epoch": 1.4608859566446748, "percentage": 29.22, "elapsed_time": "0:07:16", "remaining_time": "0:17:37", "throughput": 8246.64, "total_tokens": 3598560}
{"current_steps": 1555, "total_steps": 5305, "loss": 0.1595, "lr": 4.454578706170075e-06, "epoch": 1.46559849198869, "percentage": 29.31, "elapsed_time": "0:07:16", "remaining_time": "0:17:33", "throughput": 8260.4, "total_tokens": 3608864}
{"current_steps": 1560, "total_steps": 5305, "loss": 0.0274, "lr": 4.44943944068943e-06, "epoch": 1.4703110273327051, "percentage": 29.41, "elapsed_time": "0:07:17", "remaining_time": "0:17:30", "throughput": 8277.18, "total_tokens": 3620960}
{"current_steps": 1565, "total_steps": 5305, "loss": 0.4584, "lr": 4.444279070261035e-06, "epoch": 1.47502356267672, "percentage": 29.5, "elapsed_time": "0:07:18", "remaining_time": "0:17:26", "throughput": 8292.37, "total_tokens": 3632096}
{"current_steps": 1570, "total_steps": 5305, "loss": 0.2423, "lr": 4.4390976507518994e-06, "epoch": 1.479736098020735, "percentage": 29.59, "elapsed_time": "0:07:18", "remaining_time": "0:17:23", "throughput": 8307.85, "total_tokens": 3643424}
{"current_steps": 1575, "total_steps": 5305, "loss": 0.046, "lr": 4.433895238256909e-06, "epoch": 1.4844486333647502, "percentage": 29.69, "elapsed_time": "0:07:19", "remaining_time": "0:17:19", "throughput": 8323.02, "total_tokens": 3654624}
{"current_steps": 1580, "total_steps": 5305, "loss": 0.0609, "lr": 4.4286718890982275e-06, "epoch": 1.4891611687087654, "percentage": 29.78, "elapsed_time": "0:07:19", "remaining_time": "0:17:16", "throughput": 8337.56, "total_tokens": 3665504}
{"current_steps": 1585, "total_steps": 5305, "loss": 0.2488, "lr": 4.423427659824681e-06, "epoch": 1.4938737040527803, "percentage": 29.88, "elapsed_time": "0:07:20", "remaining_time": "0:17:13", "throughput": 8352.21, "total_tokens": 3676448}
{"current_steps": 1590, "total_steps": 5305, "loss": 0.4721, "lr": 4.418162607211146e-06, "epoch": 1.4985862393967955, "percentage": 29.97, "elapsed_time": "0:07:20", "remaining_time": "0:17:09", "throughput": 8365.14, "total_tokens": 3686432}
{"current_steps": 1595, "total_steps": 5305, "loss": 0.2207, "lr": 4.412876788257936e-06, "epoch": 1.5032987747408106, "percentage": 30.07, "elapsed_time": "0:07:21", "remaining_time": "0:17:06", "throughput": 8379.66, "total_tokens": 3697312}
{"current_steps": 1596, "total_steps": 5305, "eval_loss": 0.35448023676872253, "epoch": 1.5042412818096136, "percentage": 30.08, "elapsed_time": "0:07:24", "remaining_time": "0:17:11", "throughput": 8330.57, "total_tokens": 3699104}
{"current_steps": 1600, "total_steps": 5305, "loss": 0.2648, "lr": 4.407570260190186e-06, "epoch": 1.5080113100848256, "percentage": 30.16, "elapsed_time": "0:08:31", "remaining_time": "0:19:44", "throughput": 7247.48, "total_tokens": 3707808}
{"current_steps": 1605, "total_steps": 5305, "loss": 0.3225, "lr": 4.402243080457229e-06, "epoch": 1.5127238454288408, "percentage": 30.25, "elapsed_time": "0:08:32", "remaining_time": "0:19:40", "throughput": 7262.9, "total_tokens": 3719840}
{"current_steps": 1610, "total_steps": 5305, "loss": 0.2234, "lr": 4.396895306731978e-06, "epoch": 1.517436380772856, "percentage": 30.35, "elapsed_time": "0:08:32", "remaining_time": "0:19:36", "throughput": 7277.21, "total_tokens": 3731168}
{"current_steps": 1615, "total_steps": 5305, "loss": 0.2199, "lr": 4.391526996910298e-06, "epoch": 1.5221489161168709, "percentage": 30.44, "elapsed_time": "0:08:33", "remaining_time": "0:19:32", "throughput": 7294.07, "total_tokens": 3744160}
{"current_steps": 1620, "total_steps": 5305, "loss": 0.1515, "lr": 4.386138209110385e-06, "epoch": 1.5268614514608858, "percentage": 30.54, "elapsed_time": "0:08:33", "remaining_time": "0:19:28", "throughput": 7307.41, "total_tokens": 3754912}
{"current_steps": 1625, "total_steps": 5305, "loss": 0.1179, "lr": 4.3807290016721265e-06, "epoch": 1.5315739868049012, "percentage": 30.63, "elapsed_time": "0:08:34", "remaining_time": "0:19:25", "throughput": 7323.94, "total_tokens": 3767776}
{"current_steps": 1630, "total_steps": 5305, "loss": 0.1079, "lr": 4.375299433156483e-06, "epoch": 1.5362865221489161, "percentage": 30.73, "elapsed_time": "0:08:35", "remaining_time": "0:19:21", "throughput": 7338.06, "total_tokens": 3779104}
{"current_steps": 1635, "total_steps": 5305, "loss": 0.359, "lr": 4.3698495623448424e-06, "epoch": 1.540999057492931, "percentage": 30.82, "elapsed_time": "0:08:35", "remaining_time": "0:19:17", "throughput": 7350.65, "total_tokens": 3789408}
{"current_steps": 1640, "total_steps": 5305, "loss": 0.1058, "lr": 4.364379448238392e-06, "epoch": 1.5457115928369463, "percentage": 30.91, "elapsed_time": "0:08:36", "remaining_time": "0:19:13", "throughput": 7362.96, "total_tokens": 3799584}
{"current_steps": 1645, "total_steps": 5305, "loss": 0.3319, "lr": 4.358889150057476e-06, "epoch": 1.5504241281809614, "percentage": 31.01, "elapsed_time": "0:08:36", "remaining_time": "0:19:09", "throughput": 7380.73, "total_tokens": 3813344}
{"current_steps": 1650, "total_steps": 5305, "loss": 0.1354, "lr": 4.35337872724095e-06, "epoch": 1.5551366635249764, "percentage": 31.1, "elapsed_time": "0:08:37", "remaining_time": "0:19:05", "throughput": 7392.68, "total_tokens": 3823328}
{"current_steps": 1655, "total_steps": 5305, "loss": 0.1612, "lr": 4.347848239445548e-06, "epoch": 1.5598491988689915, "percentage": 31.2, "elapsed_time": "0:08:37", "remaining_time": "0:19:01", "throughput": 7407.61, "total_tokens": 3835232}
{"current_steps": 1660, "total_steps": 5305, "loss": 0.2858, "lr": 4.342297746545228e-06, "epoch": 1.5645617342130067, "percentage": 31.29, "elapsed_time": "0:08:38", "remaining_time": "0:18:58", "throughput": 7421.28, "total_tokens": 3846368}
{"current_steps": 1665, "total_steps": 5305, "loss": 0.0313, "lr": 4.336727308630527e-06, "epoch": 1.5692742695570217, "percentage": 31.39, "elapsed_time": "0:08:38", "remaining_time": "0:18:54", "throughput": 7436.81, "total_tokens": 3858656}
{"current_steps": 1670, "total_steps": 5305, "loss": 0.1587, "lr": 4.33113698600791e-06, "epoch": 1.5739868049010366, "percentage": 31.48, "elapsed_time": "0:08:39", "remaining_time": "0:18:50", "throughput": 7453.41, "total_tokens": 3871776}
{"current_steps": 1675, "total_steps": 5305, "loss": 0.0377, "lr": 4.325526839199115e-06, "epoch": 1.578699340245052, "percentage": 31.57, "elapsed_time": "0:08:40", "remaining_time": "0:18:47", "throughput": 7469.21, "total_tokens": 3884384}
{"current_steps": 1680, "total_steps": 5305, "loss": 0.2741, "lr": 4.319896928940505e-06, "epoch": 1.583411875589067, "percentage": 31.67, "elapsed_time": "0:08:40", "remaining_time": "0:18:43", "throughput": 7483.85, "total_tokens": 3896224}
{"current_steps": 1685, "total_steps": 5305, "loss": 0.1037, "lr": 4.3142473161824e-06, "epoch": 1.5881244109330819, "percentage": 31.76, "elapsed_time": "0:08:41", "remaining_time": "0:18:39", "throughput": 7496.08, "total_tokens": 3906528}
{"current_steps": 1690, "total_steps": 5305, "loss": 0.1437, "lr": 4.308578062088426e-06, "epoch": 1.592836946277097, "percentage": 31.86, "elapsed_time": "0:08:41", "remaining_time": "0:18:35", "throughput": 7509.63, "total_tokens": 3917728}
{"current_steps": 1695, "total_steps": 5305, "loss": 0.3957, "lr": 4.302889228034846e-06, "epoch": 1.5975494816211122, "percentage": 31.95, "elapsed_time": "0:08:42", "remaining_time": "0:18:32", "throughput": 7521.91, "total_tokens": 3928032}
{"current_steps": 1700, "total_steps": 5305, "loss": 0.1641, "lr": 4.297180875609902e-06, "epoch": 1.6022620169651272, "percentage": 32.05, "elapsed_time": "0:08:42", "remaining_time": "0:18:28", "throughput": 7537.22, "total_tokens": 3940384}
{"current_steps": 1705, "total_steps": 5305, "loss": 0.0949, "lr": 4.2914530666131436e-06, "epoch": 1.6069745523091423, "percentage": 32.14, "elapsed_time": "0:08:43", "remaining_time": "0:18:25", "throughput": 7551.26, "total_tokens": 3951904}
{"current_steps": 1710, "total_steps": 5305, "loss": 0.2799, "lr": 4.285705863054759e-06, "epoch": 1.6116870876531575, "percentage": 32.23, "elapsed_time": "0:08:43", "remaining_time": "0:18:21", "throughput": 7565.08, "total_tokens": 3963360}
{"current_steps": 1715, "total_steps": 5305, "loss": 0.3126, "lr": 4.279939327154909e-06, "epoch": 1.6163996229971724, "percentage": 32.33, "elapsed_time": "0:08:44", "remaining_time": "0:18:17", "throughput": 7578.36, "total_tokens": 3974432}
{"current_steps": 1720, "total_steps": 5305, "loss": 0.2358, "lr": 4.274153521343047e-06, "epoch": 1.6211121583411876, "percentage": 32.42, "elapsed_time": "0:08:44", "remaining_time": "0:18:14", "throughput": 7589.91, "total_tokens": 3984352}
{"current_steps": 1725, "total_steps": 5305, "loss": 0.0892, "lr": 4.268348508257243e-06, "epoch": 1.6258246936852028, "percentage": 32.52, "elapsed_time": "0:08:45", "remaining_time": "0:18:10", "throughput": 7601.06, "total_tokens": 3994016}
{"current_steps": 1730, "total_steps": 5305, "loss": 0.3199, "lr": 4.262524350743512e-06, "epoch": 1.6305372290292177, "percentage": 32.61, "elapsed_time": "0:08:46", "remaining_time": "0:18:06", "throughput": 7615.46, "total_tokens": 4005856}
{"current_steps": 1735, "total_steps": 5305, "loss": 0.1497, "lr": 4.25668111185513e-06, "epoch": 1.6352497643732327, "percentage": 32.7, "elapsed_time": "0:08:46", "remaining_time": "0:18:03", "throughput": 7629.08, "total_tokens": 4017248}
{"current_steps": 1740, "total_steps": 5305, "loss": 0.1124, "lr": 4.250818854851948e-06, "epoch": 1.6399622997172478, "percentage": 32.8, "elapsed_time": "0:08:47", "remaining_time": "0:17:59", "throughput": 7641.97, "total_tokens": 4028128}
{"current_steps": 1745, "total_steps": 5305, "loss": 0.1923, "lr": 4.244937643199711e-06, "epoch": 1.644674835061263, "percentage": 32.89, "elapsed_time": "0:08:47", "remaining_time": "0:17:56", "throughput": 7663.14, "total_tokens": 4044768}
{"current_steps": 1750, "total_steps": 5305, "loss": 0.1026, "lr": 4.239037540569373e-06, "epoch": 1.649387370405278, "percentage": 32.99, "elapsed_time": "0:08:48", "remaining_time": "0:17:53", "throughput": 7685.91, "total_tokens": 4062432}
{"current_steps": 1755, "total_steps": 5305, "loss": 0.0699, "lr": 4.233118610836401e-06, "epoch": 1.654099905749293, "percentage": 33.08, "elapsed_time": "0:08:49", "remaining_time": "0:17:50", "throughput": 7699.73, "total_tokens": 4074016}
{"current_steps": 1760, "total_steps": 5305, "loss": 0.1875, "lr": 4.227180918080089e-06, "epoch": 1.6588124410933083, "percentage": 33.18, "elapsed_time": "0:08:49", "remaining_time": "0:17:46", "throughput": 7712.12, "total_tokens": 4084704}
{"current_steps": 1765, "total_steps": 5305, "loss": 0.0828, "lr": 4.221224526582863e-06, "epoch": 1.6635249764373232, "percentage": 33.27, "elapsed_time": "0:08:50", "remaining_time": "0:17:43", "throughput": 7724.13, "total_tokens": 4095136}
{"current_steps": 1770, "total_steps": 5305, "loss": 0.1379, "lr": 4.215249500829583e-06, "epoch": 1.6682375117813384, "percentage": 33.36, "elapsed_time": "0:08:50", "remaining_time": "0:17:40", "throughput": 7739.34, "total_tokens": 4107744}
{"current_steps": 1775, "total_steps": 5305, "loss": 0.2322, "lr": 4.209255905506847e-06, "epoch": 1.6729500471253536, "percentage": 33.46, "elapsed_time": "0:08:51", "remaining_time": "0:17:36", "throughput": 7752.03, "total_tokens": 4118624}
{"current_steps": 1780, "total_steps": 5305, "loss": 0.1804, "lr": 4.2032438055022925e-06, "epoch": 1.6776625824693685, "percentage": 33.55, "elapsed_time": "0:08:51", "remaining_time": "0:17:33", "throughput": 7764.19, "total_tokens": 4129184}
{"current_steps": 1785, "total_steps": 5305, "loss": 0.3414, "lr": 4.197213265903889e-06, "epoch": 1.6823751178133834, "percentage": 33.65, "elapsed_time": "0:08:52", "remaining_time": "0:17:29", "throughput": 7778.18, "total_tokens": 4141024}
{"current_steps": 1790, "total_steps": 5305, "loss": 0.3523, "lr": 4.191164351999236e-06, "epoch": 1.6870876531573988, "percentage": 33.74, "elapsed_time": "0:08:52", "remaining_time": "0:17:26", "throughput": 7790.59, "total_tokens": 4151840}
{"current_steps": 1795, "total_steps": 5305, "loss": 0.2797, "lr": 4.18509712927486e-06, "epoch": 1.6918001885014138, "percentage": 33.84, "elapsed_time": "0:08:53", "remaining_time": "0:17:23", "throughput": 7805.98, "total_tokens": 4164704}
{"current_steps": 1800, "total_steps": 5305, "loss": 0.2943, "lr": 4.179011663415494e-06, "epoch": 1.6965127238454287, "percentage": 33.93, "elapsed_time": "0:08:54", "remaining_time": "0:17:20", "throughput": 7820.91, "total_tokens": 4177184}
{"current_steps": 1805, "total_steps": 5305, "loss": 0.0589, "lr": 4.172908020303384e-06, "epoch": 1.701225259189444, "percentage": 34.02, "elapsed_time": "0:08:54", "remaining_time": "0:17:16", "throughput": 7834.44, "total_tokens": 4188768}
{"current_steps": 1810, "total_steps": 5305, "loss": 0.1865, "lr": 4.166786266017557e-06, "epoch": 1.705937794533459, "percentage": 34.12, "elapsed_time": "0:08:55", "remaining_time": "0:17:13", "throughput": 7848.17, "total_tokens": 4200480}
{"current_steps": 1815, "total_steps": 5305, "loss": 0.1045, "lr": 4.160646466833121e-06, "epoch": 1.710650329877474, "percentage": 34.21, "elapsed_time": "0:08:55", "remaining_time": "0:17:10", "throughput": 7861.66, "total_tokens": 4212064}
{"current_steps": 1820, "total_steps": 5305, "loss": 0.2373, "lr": 4.154488689220536e-06, "epoch": 1.7153628652214892, "percentage": 34.31, "elapsed_time": "0:08:56", "remaining_time": "0:17:06", "throughput": 7872.22, "total_tokens": 4221728}
{"current_steps": 1825, "total_steps": 5305, "loss": 0.216, "lr": 4.1483129998449035e-06, "epoch": 1.7200754005655043, "percentage": 34.4, "elapsed_time": "0:08:56", "remaining_time": "0:17:03", "throughput": 7886.45, "total_tokens": 4233888}
{"current_steps": 1830, "total_steps": 5305, "loss": 0.2308, "lr": 4.142119465565238e-06, "epoch": 1.7247879359095193, "percentage": 34.5, "elapsed_time": "0:08:57", "remaining_time": "0:17:00", "throughput": 7899.66, "total_tokens": 4245344}
{"current_steps": 1835, "total_steps": 5305, "loss": 0.0663, "lr": 4.135908153433748e-06, "epoch": 1.7295004712535345, "percentage": 34.59, "elapsed_time": "0:08:57", "remaining_time": "0:16:57", "throughput": 7913.13, "total_tokens": 4256992}
{"current_steps": 1840, "total_steps": 5305, "loss": 0.0795, "lr": 4.129679130695105e-06, "epoch": 1.7342130065975496, "percentage": 34.68, "elapsed_time": "0:08:58", "remaining_time": "0:16:54", "throughput": 7923.88, "total_tokens": 4266784}
{"current_steps": 1845, "total_steps": 5305, "loss": 0.0953, "lr": 4.123432464785721e-06, "epoch": 1.7389255419415646, "percentage": 34.78, "elapsed_time": "0:08:59", "remaining_time": "0:16:51", "throughput": 7941.71, "total_tokens": 4281504}
{"current_steps": 1850, "total_steps": 5305, "loss": 0.3657, "lr": 4.117168223333015e-06, "epoch": 1.7436380772855795, "percentage": 34.87, "elapsed_time": "0:08:59", "remaining_time": "0:16:48", "throughput": 7959.28, "total_tokens": 4296032}
{"current_steps": 1855, "total_steps": 5305, "loss": 0.0417, "lr": 4.1108864741546815e-06, "epoch": 1.7483506126295947, "percentage": 34.97, "elapsed_time": "0:09:00", "remaining_time": "0:16:44", "throughput": 7974.85, "total_tokens": 4309280}
{"current_steps": 1860, "total_steps": 5305, "loss": 0.1138, "lr": 4.1045872852579546e-06, "epoch": 1.7530631479736098, "percentage": 35.06, "elapsed_time": "0:09:00", "remaining_time": "0:16:41", "throughput": 7986.29, "total_tokens": 4319648}
{"current_steps": 1862, "total_steps": 5305, "eval_loss": 0.3500010073184967, "epoch": 1.7549481621112157, "percentage": 35.1, "elapsed_time": "0:09:03", "remaining_time": "0:16:45", "throughput": 7951.47, "total_tokens": 4324256}
{"current_steps": 1865, "total_steps": 5305, "loss": 0.0767, "lr": 4.098270724838879e-06, "epoch": 1.7577756833176248, "percentage": 35.16, "elapsed_time": "0:09:34", "remaining_time": "0:17:40", "throughput": 7533.11, "total_tokens": 4330144}
{"current_steps": 1870, "total_steps": 5305, "loss": 0.0415, "lr": 4.091936861281561e-06, "epoch": 1.76248821866164, "percentage": 35.25, "elapsed_time": "0:09:35", "remaining_time": "0:17:37", "throughput": 7548.63, "total_tokens": 4343712}
{"current_steps": 1875, "total_steps": 5305, "loss": 0.4214, "lr": 4.085585763157435e-06, "epoch": 1.7672007540056551, "percentage": 35.34, "elapsed_time": "0:09:35", "remaining_time": "0:17:33", "throughput": 7559.85, "total_tokens": 4354144}
{"current_steps": 1880, "total_steps": 5305, "loss": 0.013, "lr": 4.07921749922452e-06, "epoch": 1.77191328934967, "percentage": 35.44, "elapsed_time": "0:09:36", "remaining_time": "0:17:30", "throughput": 7571.51, "total_tokens": 4364896}
{"current_steps": 1885, "total_steps": 5305, "loss": 0.1879, "lr": 4.0728321384266764e-06, "epoch": 1.7766258246936852, "percentage": 35.53, "elapsed_time": "0:09:37", "remaining_time": "0:17:26", "throughput": 7585.19, "total_tokens": 4377120}
{"current_steps": 1890, "total_steps": 5305, "loss": 0.1512, "lr": 4.066429749892854e-06, "epoch": 1.7813383600377004, "percentage": 35.63, "elapsed_time": "0:09:37", "remaining_time": "0:17:23", "throughput": 7597.16, "total_tokens": 4388128}
{"current_steps": 1895, "total_steps": 5305, "loss": 0.1946, "lr": 4.060010402936353e-06, "epoch": 1.7860508953817154, "percentage": 35.72, "elapsed_time": "0:09:38", "remaining_time": "0:17:20", "throughput": 7613.32, "total_tokens": 4402272}
{"current_steps": 1900, "total_steps": 5305, "loss": 0.0513, "lr": 4.053574167054063e-06, "epoch": 1.7907634307257303, "percentage": 35.82, "elapsed_time": "0:09:38", "remaining_time": "0:17:17", "throughput": 7623.91, "total_tokens": 4412640}
{"current_steps": 1905, "total_steps": 5305, "loss": 0.2935, "lr": 4.047121111925718e-06, "epoch": 1.7954759660697457, "percentage": 35.91, "elapsed_time": "0:09:39", "remaining_time": "0:17:13", "throughput": 7636.41, "total_tokens": 4424096}
{"current_steps": 1910, "total_steps": 5305, "loss": 0.1499, "lr": 4.040651307413142e-06, "epoch": 1.8001885014137606, "percentage": 36.0, "elapsed_time": "0:09:39", "remaining_time": "0:17:10", "throughput": 7646.98, "total_tokens": 4434144}
{"current_steps": 1915, "total_steps": 5305, "loss": 0.1671, "lr": 4.034164823559487e-06, "epoch": 1.8049010367577756, "percentage": 36.1, "elapsed_time": "0:09:40", "remaining_time": "0:17:07", "throughput": 7660.29, "total_tokens": 4446240}
{"current_steps": 1920, "total_steps": 5305, "loss": 0.183, "lr": 4.02766173058848e-06, "epoch": 1.8096135721017907, "percentage": 36.19, "elapsed_time": "0:09:40", "remaining_time": "0:17:04", "throughput": 7670.01, "total_tokens": 4455712}
{"current_steps": 1925, "total_steps": 5305, "loss": 0.2619, "lr": 4.021142098903662e-06, "epoch": 1.814326107445806, "percentage": 36.29, "elapsed_time": "0:09:41", "remaining_time": "0:17:00", "throughput": 7681.07, "total_tokens": 4466144}
{"current_steps": 1930, "total_steps": 5305, "loss": 0.2168, "lr": 4.014605999087623e-06, "epoch": 1.8190386427898209, "percentage": 36.38, "elapsed_time": "0:09:41", "remaining_time": "0:16:57", "throughput": 7691.33, "total_tokens": 4476064}
{"current_steps": 1935, "total_steps": 5305, "loss": 0.1402, "lr": 4.008053501901239e-06, "epoch": 1.823751178133836, "percentage": 36.48, "elapsed_time": "0:09:42", "remaining_time": "0:16:54", "throughput": 7703.64, "total_tokens": 4487456}
{"current_steps": 1940, "total_steps": 5305, "loss": 0.2318, "lr": 4.001484678282911e-06, "epoch": 1.8284637134778512, "percentage": 36.57, "elapsed_time": "0:09:43", "remaining_time": "0:16:51", "throughput": 7715.3, "total_tokens": 4498400}
{"current_steps": 1945, "total_steps": 5305, "loss": 0.1527, "lr": 3.994899599347787e-06, "epoch": 1.8331762488218661, "percentage": 36.66, "elapsed_time": "0:09:43", "remaining_time": "0:16:48", "throughput": 7729.78, "total_tokens": 4511520}
{"current_steps": 1950, "total_steps": 5305, "loss": 0.151, "lr": 3.9882983363869995e-06, "epoch": 1.837888784165881, "percentage": 36.76, "elapsed_time": "0:09:44", "remaining_time": "0:16:45", "throughput": 7742.41, "total_tokens": 4523232}
{"current_steps": 1955, "total_steps": 5305, "loss": 0.084, "lr": 3.981680960866896e-06, "epoch": 1.8426013195098965, "percentage": 36.85, "elapsed_time": "0:09:44", "remaining_time": "0:16:42", "throughput": 7757.02, "total_tokens": 4536416}
{"current_steps": 1960, "total_steps": 5305, "loss": 0.1193, "lr": 3.9750475444282545e-06, "epoch": 1.8473138548539114, "percentage": 36.95, "elapsed_time": "0:09:45", "remaining_time": "0:16:38", "throughput": 7767.46, "total_tokens": 4546528}
{"current_steps": 1965, "total_steps": 5305, "loss": 0.0301, "lr": 3.968398158885519e-06, "epoch": 1.8520263901979264, "percentage": 37.04, "elapsed_time": "0:09:45", "remaining_time": "0:16:35", "throughput": 7781.07, "total_tokens": 4559008}
{"current_steps": 1970, "total_steps": 5305, "loss": 0.1272, "lr": 3.961732876226016e-06, "epoch": 1.8567389255419415, "percentage": 37.13, "elapsed_time": "0:09:46", "remaining_time": "0:16:32", "throughput": 7792.41, "total_tokens": 4569824}
{"current_steps": 1975, "total_steps": 5305, "loss": 0.0125, "lr": 3.955051768609179e-06, "epoch": 1.8614514608859567, "percentage": 37.23, "elapsed_time": "0:09:47", "remaining_time": "0:16:29", "throughput": 7805.06, "total_tokens": 4581664}
{"current_steps": 1980, "total_steps": 5305, "loss": 0.2273, "lr": 3.948354908365762e-06, "epoch": 1.8661639962299716, "percentage": 37.32, "elapsed_time": "0:09:47", "remaining_time": "0:16:26", "throughput": 7817.9, "total_tokens": 4593696}
{"current_steps": 1985, "total_steps": 5305, "loss": 0.3306, "lr": 3.941642367997062e-06, "epoch": 1.8708765315739868, "percentage": 37.42, "elapsed_time": "0:09:48", "remaining_time": "0:16:23", "throughput": 7828.61, "total_tokens": 4604064}
{"current_steps": 1990, "total_steps": 5305, "loss": 0.2246, "lr": 3.934914220174128e-06, "epoch": 1.875589066918002, "percentage": 37.51, "elapsed_time": "0:09:48", "remaining_time": "0:16:20", "throughput": 7838.43, "total_tokens": 4613856}
{"current_steps": 1995, "total_steps": 5305, "loss": 0.262, "lr": 3.9281705377369814e-06, "epoch": 1.880301602262017, "percentage": 37.61, "elapsed_time": "0:09:49", "remaining_time": "0:16:17", "throughput": 7849.45, "total_tokens": 4624480}
{"current_steps": 2000, "total_steps": 5305, "loss": 0.0359, "lr": 3.921411393693823e-06, "epoch": 1.885014137606032, "percentage": 37.7, "elapsed_time": "0:09:49", "remaining_time": "0:16:14", "throughput": 7859.79, "total_tokens": 4634720}
{"current_steps": 2005, "total_steps": 5305, "loss": 0.1522, "lr": 3.9146368612202425e-06, "epoch": 1.8897266729500473, "percentage": 37.79, "elapsed_time": "0:09:50", "remaining_time": "0:16:11", "throughput": 7869.34, "total_tokens": 4644320}
{"current_steps": 2010, "total_steps": 5305, "loss": 0.1144, "lr": 3.907847013658429e-06, "epoch": 1.8944392082940622, "percentage": 37.89, "elapsed_time": "0:09:50", "remaining_time": "0:16:08", "throughput": 7882.48, "total_tokens": 4656672}
{"current_steps": 2015, "total_steps": 5305, "loss": 0.152, "lr": 3.901041924516372e-06, "epoch": 1.8991517436380771, "percentage": 37.98, "elapsed_time": "0:09:51", "remaining_time": "0:16:05", "throughput": 7895.41, "total_tokens": 4668832}
{"current_steps": 2020, "total_steps": 5305, "loss": 0.0683, "lr": 3.894221667467074e-06, "epoch": 1.9038642789820923, "percentage": 38.08, "elapsed_time": "0:09:51", "remaining_time": "0:16:02", "throughput": 7907.15, "total_tokens": 4680096}
{"current_steps": 2025, "total_steps": 5305, "loss": 0.0966, "lr": 3.887386316347742e-06, "epoch": 1.9085768143261075, "percentage": 38.17, "elapsed_time": "0:09:52", "remaining_time": "0:15:59", "throughput": 7920.05, "total_tokens": 4692320}
{"current_steps": 2030, "total_steps": 5305, "loss": 0.1503, "lr": 3.880535945158997e-06, "epoch": 1.9132893496701224, "percentage": 38.27, "elapsed_time": "0:09:53", "remaining_time": "0:15:56", "throughput": 7939.16, "total_tokens": 4709344}
{"current_steps": 2035, "total_steps": 5305, "loss": 0.0726, "lr": 3.873670628064071e-06, "epoch": 1.9180018850141376, "percentage": 38.36, "elapsed_time": "0:09:53", "remaining_time": "0:15:54", "throughput": 7952.47, "total_tokens": 4721888}
{"current_steps": 2040, "total_steps": 5305, "loss": 0.117, "lr": 3.866790439387998e-06, "epoch": 1.9227144203581528, "percentage": 38.45, "elapsed_time": "0:09:54", "remaining_time": "0:15:51", "throughput": 7963.06, "total_tokens": 4732384}
{"current_steps": 2045, "total_steps": 5305, "loss": 0.2188, "lr": 3.85989545361682e-06, "epoch": 1.9274269557021677, "percentage": 38.55, "elapsed_time": "0:09:54", "remaining_time": "0:15:48", "throughput": 7974.13, "total_tokens": 4743264}
{"current_steps": 2050, "total_steps": 5305, "loss": 0.1091, "lr": 3.85298574539677e-06, "epoch": 1.9321394910461829, "percentage": 38.64, "elapsed_time": "0:09:55", "remaining_time": "0:15:45", "throughput": 7983.97, "total_tokens": 4753248}
{"current_steps": 2055, "total_steps": 5305, "loss": 0.0907, "lr": 3.846061389533472e-06, "epoch": 1.936852026390198, "percentage": 38.74, "elapsed_time": "0:09:55", "remaining_time": "0:15:42", "throughput": 7995.81, "total_tokens": 4764768}
{"current_steps": 2060, "total_steps": 5305, "loss": 0.2683, "lr": 3.839122460991124e-06, "epoch": 1.941564561734213, "percentage": 38.83, "elapsed_time": "0:09:56", "remaining_time": "0:15:39", "throughput": 8006.65, "total_tokens": 4775456}
{"current_steps": 2065, "total_steps": 5305, "loss": 0.3549, "lr": 3.832169034891695e-06, "epoch": 1.946277097078228, "percentage": 38.93, "elapsed_time": "0:09:57", "remaining_time": "0:15:36", "throughput": 8021.3, "total_tokens": 4789152}
{"current_steps": 2070, "total_steps": 5305, "loss": 0.0639, "lr": 3.825201186514103e-06, "epoch": 1.9509896324222433, "percentage": 39.02, "elapsed_time": "0:09:57", "remaining_time": "0:15:34", "throughput": 8036.72, "total_tokens": 4803488}
{"current_steps": 2075, "total_steps": 5305, "loss": 0.2019, "lr": 3.818218991293406e-06, "epoch": 1.9557021677662583, "percentage": 39.11, "elapsed_time": "0:09:58", "remaining_time": "0:15:31", "throughput": 8046.21, "total_tokens": 4813216}
{"current_steps": 2080, "total_steps": 5305, "loss": 0.1943, "lr": 3.811222524819983e-06, "epoch": 1.9604147031102732, "percentage": 39.21, "elapsed_time": "0:09:58", "remaining_time": "0:15:28", "throughput": 8056.6, "total_tokens": 4823584}
{"current_steps": 2085, "total_steps": 5305, "loss": 0.0531, "lr": 3.8042118628387138e-06, "epoch": 1.9651272384542884, "percentage": 39.3, "elapsed_time": "0:09:59", "remaining_time": "0:15:25", "throughput": 8072.83, "total_tokens": 4838624}
{"current_steps": 2090, "total_steps": 5305, "loss": 0.0121, "lr": 3.7971870812481636e-06, "epoch": 1.9698397737983036, "percentage": 39.4, "elapsed_time": "0:09:59", "remaining_time": "0:15:22", "throughput": 8086.32, "total_tokens": 4851552}
{"current_steps": 2095, "total_steps": 5305, "loss": 0.1929, "lr": 3.7901482560997577e-06, "epoch": 1.9745523091423185, "percentage": 39.49, "elapsed_time": "0:10:00", "remaining_time": "0:15:20", "throughput": 8099.68, "total_tokens": 4864352}
{"current_steps": 2100, "total_steps": 5305, "loss": 0.2053, "lr": 3.78309546359696e-06, "epoch": 1.9792648444863337, "percentage": 39.59, "elapsed_time": "0:10:01", "remaining_time": "0:15:17", "throughput": 8110.99, "total_tokens": 4875616}
{"current_steps": 2105, "total_steps": 5305, "loss": 0.0107, "lr": 3.776028780094446e-06, "epoch": 1.9839773798303488, "percentage": 39.68, "elapsed_time": "0:10:01", "remaining_time": "0:15:14", "throughput": 8121.94, "total_tokens": 4886560}
{"current_steps": 2110, "total_steps": 5305, "loss": 0.2379, "lr": 3.7689482820972797e-06, "epoch": 1.9886899151743638, "percentage": 39.77, "elapsed_time": "0:10:02", "remaining_time": "0:15:11", "throughput": 8134.21, "total_tokens": 4898592}
{"current_steps": 2115, "total_steps": 5305, "loss": 0.2504, "lr": 3.7618540462600792e-06, "epoch": 1.993402450518379, "percentage": 39.87, "elapsed_time": "0:10:02", "remaining_time": "0:15:09", "throughput": 8148.48, "total_tokens": 4912160}
{"current_steps": 2120, "total_steps": 5305, "loss": 0.1832, "lr": 3.7547461493861948e-06, "epoch": 1.998114985862394, "percentage": 39.96, "elapsed_time": "0:10:03", "remaining_time": "0:15:06", "throughput": 8159.79, "total_tokens": 4923424}
{"current_steps": 2125, "total_steps": 5305, "loss": 0.0762, "lr": 3.7476246684268703e-06, "epoch": 2.002827521206409, "percentage": 40.06, "elapsed_time": "0:10:04", "remaining_time": "0:15:05", "throughput": 8154.58, "total_tokens": 4932416}
{"current_steps": 2128, "total_steps": 5305, "eval_loss": 0.33445462584495544, "epoch": 2.005655042412818, "percentage": 40.11, "elapsed_time": "0:10:08", "remaining_time": "0:15:08", "throughput": 8118.67, "total_tokens": 4940992}
{"current_steps": 2130, "total_steps": 5305, "loss": 0.0528, "lr": 3.740489680480415e-06, "epoch": 2.007540056550424, "percentage": 40.15, "elapsed_time": "0:11:00", "remaining_time": "0:16:25", "throughput": 7487.96, "total_tokens": 4948288}
{"current_steps": 2135, "total_steps": 5305, "loss": 0.0067, "lr": 3.733341262791366e-06, "epoch": 2.0122525918944394, "percentage": 40.25, "elapsed_time": "0:11:01", "remaining_time": "0:16:22", "throughput": 7499.95, "total_tokens": 4960512}
{"current_steps": 2140, "total_steps": 5305, "loss": 0.0027, "lr": 3.7261794927496535e-06, "epoch": 2.0169651272384543, "percentage": 40.34, "elapsed_time": "0:11:01", "remaining_time": "0:16:19", "throughput": 7511.43, "total_tokens": 4972352}
{"current_steps": 2145, "total_steps": 5305, "loss": 0.0681, "lr": 3.719004447889762e-06, "epoch": 2.0216776625824693, "percentage": 40.43, "elapsed_time": "0:11:02", "remaining_time": "0:16:15", "throughput": 7520.63, "total_tokens": 4982272}
{"current_steps": 2150, "total_steps": 5305, "loss": 0.1795, "lr": 3.7118162058898915e-06, "epoch": 2.0263901979264842, "percentage": 40.53, "elapsed_time": "0:11:03", "remaining_time": "0:16:12", "throughput": 7530.9, "total_tokens": 4993088}
{"current_steps": 2155, "total_steps": 5305, "loss": 0.0124, "lr": 3.704614844571117e-06, "epoch": 2.0311027332704996, "percentage": 40.62, "elapsed_time": "0:11:03", "remaining_time": "0:16:09", "throughput": 7540.49, "total_tokens": 5003392}
{"current_steps": 2160, "total_steps": 5305, "loss": 0.0007, "lr": 3.6974004418965435e-06, "epoch": 2.0358152686145146, "percentage": 40.72, "elapsed_time": "0:11:04", "remaining_time": "0:16:06", "throughput": 7551.12, "total_tokens": 5014592}
{"current_steps": 2165, "total_steps": 5305, "loss": 0.1943, "lr": 3.6901730759704674e-06, "epoch": 2.0405278039585295, "percentage": 40.81, "elapsed_time": "0:11:04", "remaining_time": "0:16:04", "throughput": 7564.58, "total_tokens": 5028160}
{"current_steps": 2170, "total_steps": 5305, "loss": 0.1365, "lr": 3.682932825037523e-06, "epoch": 2.045240339302545, "percentage": 40.9, "elapsed_time": "0:11:05", "remaining_time": "0:16:01", "throughput": 7572.98, "total_tokens": 5037504}
{"current_steps": 2175, "total_steps": 5305, "loss": 0.0894, "lr": 3.675679767481842e-06, "epoch": 2.04995287464656, "percentage": 41.0, "elapsed_time": "0:11:05", "remaining_time": "0:15:58", "throughput": 7587.85, "total_tokens": 5052288}
{"current_steps": 2180, "total_steps": 5305, "loss": 0.1397, "lr": 3.6684139818262045e-06, "epoch": 2.054665409990575, "percentage": 41.09, "elapsed_time": "0:11:06", "remaining_time": "0:15:55", "throughput": 7599.5, "total_tokens": 5064384}
{"current_steps": 2185, "total_steps": 5305, "loss": 0.0268, "lr": 3.6611355467311825e-06, "epoch": 2.05937794533459, "percentage": 41.19, "elapsed_time": "0:11:06", "remaining_time": "0:15:52", "throughput": 7608.46, "total_tokens": 5074240}
{"current_steps": 2190, "total_steps": 5305, "loss": 0.0081, "lr": 3.653844540994298e-06, "epoch": 2.064090480678605, "percentage": 41.28, "elapsed_time": "0:11:07", "remaining_time": "0:15:49", "throughput": 7618.86, "total_tokens": 5085312}
{"current_steps": 2195, "total_steps": 5305, "loss": 0.0006, "lr": 3.6465410435491603e-06, "epoch": 2.06880301602262, "percentage": 41.38, "elapsed_time": "0:11:07", "remaining_time": "0:15:46", "throughput": 7627.12, "total_tokens": 5094592}
{"current_steps": 2200, "total_steps": 5305, "loss": 0.0012, "lr": 3.6392251334646194e-06, "epoch": 2.0735155513666355, "percentage": 41.47, "elapsed_time": "0:11:08", "remaining_time": "0:15:43", "throughput": 7640.88, "total_tokens": 5108544}
{"current_steps": 2205, "total_steps": 5305, "loss": 0.2164, "lr": 3.6318968899439042e-06, "epoch": 2.0782280867106504, "percentage": 41.56, "elapsed_time": "0:11:09", "remaining_time": "0:15:40", "throughput": 7648.84, "total_tokens": 5118976}
{"current_steps": 2210, "total_steps": 5305, "loss": 0.0004, "lr": 3.6245563923237692e-06, "epoch": 2.0829406220546653, "percentage": 41.66, "elapsed_time": "0:11:09", "remaining_time": "0:15:38", "throughput": 7664.08, "total_tokens": 5134272}
{"current_steps": 2215, "total_steps": 5305, "loss": 0.0463, "lr": 3.617203720073633e-06, "epoch": 2.0876531573986803, "percentage": 41.75, "elapsed_time": "0:11:10", "remaining_time": "0:15:35", "throughput": 7674.4, "total_tokens": 5145408}
{"current_steps": 2220, "total_steps": 5305, "loss": 0.1413, "lr": 3.6098389527947164e-06, "epoch": 2.0923656927426957, "percentage": 41.85, "elapsed_time": "0:11:11", "remaining_time": "0:15:32", "throughput": 7685.87, "total_tokens": 5157440}
{"current_steps": 2225, "total_steps": 5305, "loss": 0.0007, "lr": 3.6024621702191876e-06, "epoch": 2.0970782280867106, "percentage": 41.94, "elapsed_time": "0:11:11", "remaining_time": "0:15:29", "throughput": 7698.04, "total_tokens": 5170176}
{"current_steps": 2230, "total_steps": 5305, "loss": 0.2877, "lr": 3.5950734522092908e-06, "epoch": 2.1017907634307256, "percentage": 42.04, "elapsed_time": "0:11:12", "remaining_time": "0:15:26", "throughput": 7705.6, "total_tokens": 5178944}
{"current_steps": 2235, "total_steps": 5305, "loss": 0.0007, "lr": 3.587672878756487e-06, "epoch": 2.106503298774741, "percentage": 42.13, "elapsed_time": "0:11:12", "remaining_time": "0:15:23", "throughput": 7716.13, "total_tokens": 5190272}
{"current_steps": 2240, "total_steps": 5305, "loss": 0.0004, "lr": 3.5802605299805843e-06, "epoch": 2.111215834118756, "percentage": 42.22, "elapsed_time": "0:11:13", "remaining_time": "0:15:21", "throughput": 7727.47, "total_tokens": 5202304}
{"current_steps": 2245, "total_steps": 5305, "loss": 0.1757, "lr": 3.5728364861288743e-06, "epoch": 2.115928369462771, "percentage": 42.32, "elapsed_time": "0:11:13", "remaining_time": "0:15:18", "throughput": 7740.51, "total_tokens": 5215808}
{"current_steps": 2250, "total_steps": 5305, "loss": 0.0003, "lr": 3.5654008275752607e-06, "epoch": 2.1206409048067862, "percentage": 42.41, "elapsed_time": "0:11:14", "remaining_time": "0:15:15", "throughput": 7753.23, "total_tokens": 5229056}
{"current_steps": 2255, "total_steps": 5305, "loss": 0.0007, "lr": 3.557953634819389e-06, "epoch": 2.125353440150801, "percentage": 42.51, "elapsed_time": "0:11:14", "remaining_time": "0:15:12", "throughput": 7762.86, "total_tokens": 5239616}
{"current_steps": 2260, "total_steps": 5305, "loss": 0.1511, "lr": 3.550494988485777e-06, "epoch": 2.130065975494816, "percentage": 42.6, "elapsed_time": "0:11:15", "remaining_time": "0:15:10", "throughput": 7771.73, "total_tokens": 5249600}
{"current_steps": 2265, "total_steps": 5305, "loss": 0.2004, "lr": 3.5430249693229403e-06, "epoch": 2.1347785108388315, "percentage": 42.7, "elapsed_time": "0:11:16", "remaining_time": "0:15:07", "throughput": 7783.28, "total_tokens": 5261888}
{"current_steps": 2270, "total_steps": 5305, "loss": 0.0272, "lr": 3.5355436582025184e-06, "epoch": 2.1394910461828465, "percentage": 42.79, "elapsed_time": "0:11:16", "remaining_time": "0:15:04", "throughput": 7793.17, "total_tokens": 5272768}
{"current_steps": 2275, "total_steps": 5305, "loss": 0.142, "lr": 3.5280511361183995e-06, "epoch": 2.1442035815268614, "percentage": 42.88, "elapsed_time": "0:11:17", "remaining_time": "0:15:01", "throughput": 7802.88, "total_tokens": 5283520}
{"current_steps": 2280, "total_steps": 5305, "loss": 0.0003, "lr": 3.5205474841858444e-06, "epoch": 2.1489161168708764, "percentage": 42.98, "elapsed_time": "0:11:17", "remaining_time": "0:14:59", "throughput": 7812.59, "total_tokens": 5294336}
{"current_steps": 2285, "total_steps": 5305, "loss": 0.0445, "lr": 3.513032783640605e-06, "epoch": 2.1536286522148917, "percentage": 43.07, "elapsed_time": "0:11:18", "remaining_time": "0:14:56", "throughput": 7822.2, "total_tokens": 5304960}
{"current_steps": 2290, "total_steps": 5305, "loss": 0.0002, "lr": 3.5055071158380512e-06, "epoch": 2.1583411875589067, "percentage": 43.17, "elapsed_time": "0:11:18", "remaining_time": "0:14:53", "throughput": 7833.59, "total_tokens": 5317184}
{"current_steps": 2295, "total_steps": 5305, "loss": 0.0003, "lr": 3.497970562252282e-06, "epoch": 2.1630537229029216, "percentage": 43.26, "elapsed_time": "0:11:19", "remaining_time": "0:14:50", "throughput": 7844.67, "total_tokens": 5329152}
{"current_steps": 2300, "total_steps": 5305, "loss": 0.232, "lr": 3.4904232044752507e-06, "epoch": 2.167766258246937, "percentage": 43.36, "elapsed_time": "0:11:19", "remaining_time": "0:14:48", "throughput": 7856.77, "total_tokens": 5342016}
{"current_steps": 2305, "total_steps": 5305, "loss": 0.1157, "lr": 3.4828651242158764e-06, "epoch": 2.172478793590952, "percentage": 43.45, "elapsed_time": "0:11:20", "remaining_time": "0:14:45", "throughput": 7866.41, "total_tokens": 5352768}
{"current_steps": 2310, "total_steps": 5305, "loss": 0.1506, "lr": 3.4752964032991638e-06, "epoch": 2.177191328934967, "percentage": 43.54, "elapsed_time": "0:11:21", "remaining_time": "0:14:42", "throughput": 7876.76, "total_tokens": 5364160}
{"current_steps": 2315, "total_steps": 5305, "loss": 0.1442, "lr": 3.4677171236653133e-06, "epoch": 2.181903864278982, "percentage": 43.64, "elapsed_time": "0:11:21", "remaining_time": "0:14:40", "throughput": 7888.16, "total_tokens": 5376448}
{"current_steps": 2320, "total_steps": 5305, "loss": 0.0562, "lr": 3.460127367368836e-06, "epoch": 2.1866163996229973, "percentage": 43.73, "elapsed_time": "0:11:22", "remaining_time": "0:14:37", "throughput": 7897.01, "total_tokens": 5386560}
{"current_steps": 2325, "total_steps": 5305, "loss": 0.1956, "lr": 3.452527216577665e-06, "epoch": 2.191328934967012, "percentage": 43.83, "elapsed_time": "0:11:22", "remaining_time": "0:14:35", "throughput": 7908.79, "total_tokens": 5399296}
{"current_steps": 2330, "total_steps": 5305, "loss": 0.1061, "lr": 3.444916753572267e-06, "epoch": 2.196041470311027, "percentage": 43.92, "elapsed_time": "0:11:23", "remaining_time": "0:14:32", "throughput": 7919.33, "total_tokens": 5410944}
{"current_steps": 2335, "total_steps": 5305, "loss": 0.0012, "lr": 3.4372960607447493e-06, "epoch": 2.2007540056550425, "percentage": 44.02, "elapsed_time": "0:11:23", "remaining_time": "0:14:29", "throughput": 7930.56, "total_tokens": 5423168}
{"current_steps": 2340, "total_steps": 5305, "loss": 0.0111, "lr": 3.429665220597968e-06, "epoch": 2.2054665409990575, "percentage": 44.11, "elapsed_time": "0:11:24", "remaining_time": "0:14:27", "throughput": 7943.07, "total_tokens": 5436544}
{"current_steps": 2345, "total_steps": 5305, "loss": 0.0934, "lr": 3.4220243157446388e-06, "epoch": 2.2101790763430724, "percentage": 44.2, "elapsed_time": "0:11:25", "remaining_time": "0:14:24", "throughput": 7954.0, "total_tokens": 5448512}
{"current_steps": 2350, "total_steps": 5305, "loss": 0.0139, "lr": 3.4143734289064363e-06, "epoch": 2.214891611687088, "percentage": 44.3, "elapsed_time": "0:11:25", "remaining_time": "0:14:22", "throughput": 7964.37, "total_tokens": 5460032}
{"current_steps": 2355, "total_steps": 5305, "loss": 0.0004, "lr": 3.4067126429131035e-06, "epoch": 2.2196041470311028, "percentage": 44.39, "elapsed_time": "0:11:26", "remaining_time": "0:14:19", "throughput": 7976.18, "total_tokens": 5472896}
{"current_steps": 2360, "total_steps": 5305, "loss": 0.0005, "lr": 3.3990420407015534e-06, "epoch": 2.2243166823751177, "percentage": 44.49, "elapsed_time": "0:11:26", "remaining_time": "0:14:16", "throughput": 7984.8, "total_tokens": 5482944}
{"current_steps": 2365, "total_steps": 5305, "loss": 0.0536, "lr": 3.3913617053149694e-06, "epoch": 2.229029217719133, "percentage": 44.58, "elapsed_time": "0:11:27", "remaining_time": "0:14:14", "throughput": 7994.92, "total_tokens": 5494336}
{"current_steps": 2370, "total_steps": 5305, "loss": 0.0001, "lr": 3.3836717199019087e-06, "epoch": 2.233741753063148, "percentage": 44.67, "elapsed_time": "0:11:27", "remaining_time": "0:14:11", "throughput": 8005.03, "total_tokens": 5505728}
{"current_steps": 2375, "total_steps": 5305, "loss": 0.0861, "lr": 3.3759721677154022e-06, "epoch": 2.238454288407163, "percentage": 44.77, "elapsed_time": "0:11:28", "remaining_time": "0:14:09", "throughput": 8013.21, "total_tokens": 5515328}
{"current_steps": 2380, "total_steps": 5305, "loss": 0.0002, "lr": 3.3682631321120507e-06, "epoch": 2.243166823751178, "percentage": 44.86, "elapsed_time": "0:11:28", "remaining_time": "0:14:06", "throughput": 8022.23, "total_tokens": 5525760}
{"current_steps": 2385, "total_steps": 5305, "loss": 0.168, "lr": 3.3605446965511256e-06, "epoch": 2.2478793590951933, "percentage": 44.96, "elapsed_time": "0:11:29", "remaining_time": "0:14:03", "throughput": 8032.52, "total_tokens": 5537280}
{"current_steps": 2390, "total_steps": 5305, "loss": 0.0898, "lr": 3.3528169445936616e-06, "epoch": 2.2525918944392083, "percentage": 45.05, "elapsed_time": "0:11:29", "remaining_time": "0:14:01", "throughput": 8042.92, "total_tokens": 5548928}
{"current_steps": 2394, "total_steps": 5305, "eval_loss": 0.46465176343917847, "epoch": 2.2563619227144205, "percentage": 45.13, "elapsed_time": "0:11:33", "remaining_time": "0:14:02", "throughput": 8019.56, "total_tokens": 5558144}
{"current_steps": 2395, "total_steps": 5305, "loss": 0.1847, "lr": 3.3450799599015567e-06, "epoch": 2.257304429783223, "percentage": 45.15, "elapsed_time": "0:12:25", "remaining_time": "0:15:06", "throughput": 7453.31, "total_tokens": 5559872}
{"current_steps": 2400, "total_steps": 5305, "loss": 0.0234, "lr": 3.3373338262366617e-06, "epoch": 2.2620169651272386, "percentage": 45.24, "elapsed_time": "0:12:26", "remaining_time": "0:15:03", "throughput": 7463.04, "total_tokens": 5571264}
{"current_steps": 2405, "total_steps": 5305, "loss": 0.0881, "lr": 3.329578627459878e-06, "epoch": 2.2667295004712535, "percentage": 45.33, "elapsed_time": "0:12:27", "remaining_time": "0:15:00", "throughput": 7471.35, "total_tokens": 5581312}
{"current_steps": 2410, "total_steps": 5305, "loss": 0.0004, "lr": 3.3218144475302444e-06, "epoch": 2.2714420358152685, "percentage": 45.43, "elapsed_time": "0:12:27", "remaining_time": "0:14:58", "throughput": 7480.7, "total_tokens": 5592384}
{"current_steps": 2415, "total_steps": 5305, "loss": 0.1036, "lr": 3.314041370504034e-06, "epoch": 2.276154571159284, "percentage": 45.52, "elapsed_time": "0:12:28", "remaining_time": "0:14:55", "throughput": 7490.13, "total_tokens": 5603456}
{"current_steps": 2420, "total_steps": 5305, "loss": 0.0579, "lr": 3.30625948053384e-06, "epoch": 2.280867106503299, "percentage": 45.62, "elapsed_time": "0:12:28", "remaining_time": "0:14:52", "throughput": 7499.39, "total_tokens": 5614464}
{"current_steps": 2425, "total_steps": 5305, "loss": 0.089, "lr": 3.2984688618676665e-06, "epoch": 2.2855796418473138, "percentage": 45.71, "elapsed_time": "0:12:29", "remaining_time": "0:14:49", "throughput": 7509.32, "total_tokens": 5626112}
{"current_steps": 2430, "total_steps": 5305, "loss": 0.0886, "lr": 3.2906695988480144e-06, "epoch": 2.290292177191329, "percentage": 45.81, "elapsed_time": "0:12:29", "remaining_time": "0:14:47", "throughput": 7518.75, "total_tokens": 5637248}
{"current_steps": 2435, "total_steps": 5305, "loss": 0.0709, "lr": 3.2828617759109715e-06, "epoch": 2.295004712535344, "percentage": 45.9, "elapsed_time": "0:12:30", "remaining_time": "0:14:44", "throughput": 7527.22, "total_tokens": 5647552}
{"current_steps": 2440, "total_steps": 5305, "loss": 0.0006, "lr": 3.2750454775852956e-06, "epoch": 2.299717247879359, "percentage": 45.99, "elapsed_time": "0:12:30", "remaining_time": "0:14:41", "throughput": 7540.11, "total_tokens": 5662080}
{"current_steps": 2445, "total_steps": 5305, "loss": 0.0005, "lr": 3.2672207884915017e-06, "epoch": 2.304429783223374, "percentage": 46.09, "elapsed_time": "0:12:31", "remaining_time": "0:14:39", "throughput": 7550.19, "total_tokens": 5673856}
{"current_steps": 2450, "total_steps": 5305, "loss": 0.107, "lr": 3.2593877933409436e-06, "epoch": 2.3091423185673894, "percentage": 46.18, "elapsed_time": "0:12:32", "remaining_time": "0:14:36", "throughput": 7558.37, "total_tokens": 5683904}
{"current_steps": 2455, "total_steps": 5305, "loss": 0.0003, "lr": 3.251546576934897e-06, "epoch": 2.3138548539114043, "percentage": 46.28, "elapsed_time": "0:12:32", "remaining_time": "0:14:33", "throughput": 7567.03, "total_tokens": 5694400}
{"current_steps": 2460, "total_steps": 5305, "loss": 0.1635, "lr": 3.2436972241636443e-06, "epoch": 2.3185673892554193, "percentage": 46.37, "elapsed_time": "0:12:33", "remaining_time": "0:14:30", "throughput": 7576.42, "total_tokens": 5705664}
{"current_steps": 2465, "total_steps": 5305, "loss": 0.0001, "lr": 3.2358398200055515e-06, "epoch": 2.3232799245994347, "percentage": 46.47, "elapsed_time": "0:12:33", "remaining_time": "0:14:28", "throughput": 7587.87, "total_tokens": 5718848}
{"current_steps": 2470, "total_steps": 5305, "loss": 0.0504, "lr": 3.227974449526152e-06, "epoch": 2.3279924599434496, "percentage": 46.56, "elapsed_time": "0:12:34", "remaining_time": "0:14:25", "throughput": 7599.34, "total_tokens": 5732096}
{"current_steps": 2475, "total_steps": 5305, "loss": 0.09, "lr": 3.2201011978772224e-06, "epoch": 2.3327049952874646, "percentage": 46.65, "elapsed_time": "0:12:34", "remaining_time": "0:14:23", "throughput": 7607.44, "total_tokens": 5742144}
{"current_steps": 2480, "total_steps": 5305, "loss": 0.0647, "lr": 3.2122201502958635e-06, "epoch": 2.3374175306314795, "percentage": 46.75, "elapsed_time": "0:12:35", "remaining_time": "0:14:20", "throughput": 7617.6, "total_tokens": 5754176}
{"current_steps": 2485, "total_steps": 5305, "loss": 0.0155, "lr": 3.2043313921035747e-06, "epoch": 2.342130065975495, "percentage": 46.84, "elapsed_time": "0:12:35", "remaining_time": "0:14:17", "throughput": 7628.61, "total_tokens": 5767104}
{"current_steps": 2490, "total_steps": 5305, "loss": 0.3015, "lr": 3.1964350087053323e-06, "epoch": 2.34684260131951, "percentage": 46.94, "elapsed_time": "0:12:36", "remaining_time": "0:14:15", "throughput": 7639.16, "total_tokens": 5779520}
{"current_steps": 2495, "total_steps": 5305, "loss": 0.0284, "lr": 3.1885310855886655e-06, "epoch": 2.3515551366635252, "percentage": 47.03, "elapsed_time": "0:12:37", "remaining_time": "0:14:12", "throughput": 7650.39, "total_tokens": 5792640}
{"current_steps": 2500, "total_steps": 5305, "loss": 0.0001, "lr": 3.1806197083227276e-06, "epoch": 2.35626767200754, "percentage": 47.13, "elapsed_time": "0:12:37", "remaining_time": "0:14:10", "throughput": 7661.54, "total_tokens": 5805696}
{"current_steps": 2505, "total_steps": 5305, "loss": 0.168, "lr": 3.172700962557373e-06, "epoch": 2.360980207351555, "percentage": 47.22, "elapsed_time": "0:12:38", "remaining_time": "0:14:07", "throughput": 7673.81, "total_tokens": 5819840}
{"current_steps": 2510, "total_steps": 5305, "loss": 0.1209, "lr": 3.1647749340222288e-06, "epoch": 2.36569274269557, "percentage": 47.31, "elapsed_time": "0:12:38", "remaining_time": "0:14:05", "throughput": 7681.99, "total_tokens": 5830016}
{"current_steps": 2515, "total_steps": 5305, "loss": 0.0744, "lr": 3.1568417085257653e-06, "epoch": 2.3704052780395855, "percentage": 47.41, "elapsed_time": "0:12:39", "remaining_time": "0:14:02", "throughput": 7689.91, "total_tokens": 5840000}
{"current_steps": 2520, "total_steps": 5305, "loss": 0.0681, "lr": 3.1489013719543703e-06, "epoch": 2.3751178133836004, "percentage": 47.5, "elapsed_time": "0:12:39", "remaining_time": "0:13:59", "throughput": 7697.8, "total_tokens": 5849920}
{"current_steps": 2525, "total_steps": 5305, "loss": 0.2567, "lr": 3.140954010271416e-06, "epoch": 2.3798303487276153, "percentage": 47.6, "elapsed_time": "0:12:40", "remaining_time": "0:13:57", "throughput": 7706.35, "total_tokens": 5860480}
{"current_steps": 2530, "total_steps": 5305, "loss": 0.0055, "lr": 3.132999709516329e-06, "epoch": 2.3845428840716307, "percentage": 47.69, "elapsed_time": "0:12:41", "remaining_time": "0:13:54", "throughput": 7717.24, "total_tokens": 5873408}
{"current_steps": 2535, "total_steps": 5305, "loss": 0.0887, "lr": 3.1250385558036606e-06, "epoch": 2.3892554194156457, "percentage": 47.79, "elapsed_time": "0:12:41", "remaining_time": "0:13:52", "throughput": 7726.37, "total_tokens": 5884608}
{"current_steps": 2540, "total_steps": 5305, "loss": 0.2362, "lr": 3.1170706353221525e-06, "epoch": 2.3939679547596606, "percentage": 47.88, "elapsed_time": "0:12:42", "remaining_time": "0:13:49", "throughput": 7735.81, "total_tokens": 5896064}
{"current_steps": 2545, "total_steps": 5305, "loss": 0.0014, "lr": 3.109096034333805e-06, "epoch": 2.3986804901036756, "percentage": 47.97, "elapsed_time": "0:12:42", "remaining_time": "0:13:47", "throughput": 7745.46, "total_tokens": 5907776}
{"current_steps": 2550, "total_steps": 5305, "loss": 0.0292, "lr": 3.1011148391729434e-06, "epoch": 2.403393025447691, "percentage": 48.07, "elapsed_time": "0:12:43", "remaining_time": "0:13:44", "throughput": 7755.38, "total_tokens": 5919744}
{"current_steps": 2555, "total_steps": 5305, "loss": 0.18, "lr": 3.0931271362452803e-06, "epoch": 2.408105560791706, "percentage": 48.16, "elapsed_time": "0:12:43", "remaining_time": "0:13:42", "throughput": 7765.8, "total_tokens": 5932224}
{"current_steps": 2560, "total_steps": 5305, "loss": 0.001, "lr": 3.085133012026985e-06, "epoch": 2.412818096135721, "percentage": 48.26, "elapsed_time": "0:12:44", "remaining_time": "0:13:39", "throughput": 7774.88, "total_tokens": 5943424}
{"current_steps": 2565, "total_steps": 5305, "loss": 0.1243, "lr": 3.0771325530637434e-06, "epoch": 2.4175306314797362, "percentage": 48.35, "elapsed_time": "0:12:45", "remaining_time": "0:13:37", "throughput": 7785.29, "total_tokens": 5955904}
{"current_steps": 2570, "total_steps": 5305, "loss": 0.0789, "lr": 3.0691258459698227e-06, "epoch": 2.422243166823751, "percentage": 48.44, "elapsed_time": "0:12:45", "remaining_time": "0:13:34", "throughput": 7794.63, "total_tokens": 5967360}
{"current_steps": 2575, "total_steps": 5305, "loss": 0.1948, "lr": 3.0611129774271318e-06, "epoch": 2.426955702167766, "percentage": 48.54, "elapsed_time": "0:12:46", "remaining_time": "0:13:32", "throughput": 7805.78, "total_tokens": 5980608}
{"current_steps": 2580, "total_steps": 5305, "loss": 0.0003, "lr": 3.0530940341842883e-06, "epoch": 2.4316682375117815, "percentage": 48.63, "elapsed_time": "0:12:46", "remaining_time": "0:13:29", "throughput": 7816.51, "total_tokens": 5993472}
{"current_steps": 2585, "total_steps": 5305, "loss": 0.0005, "lr": 3.045069103055672e-06, "epoch": 2.4363807728557965, "percentage": 48.73, "elapsed_time": "0:12:47", "remaining_time": "0:13:27", "throughput": 7824.35, "total_tokens": 6003520}
{"current_steps": 2590, "total_steps": 5305, "loss": 0.0118, "lr": 3.037038270920489e-06, "epoch": 2.4410933081998114, "percentage": 48.82, "elapsed_time": "0:12:47", "remaining_time": "0:13:24", "throughput": 7833.37, "total_tokens": 6014720}
{"current_steps": 2595, "total_steps": 5305, "loss": 0.0956, "lr": 3.0290016247218323e-06, "epoch": 2.445805843543827, "percentage": 48.92, "elapsed_time": "0:12:48", "remaining_time": "0:13:22", "throughput": 7848.64, "total_tokens": 6032192}
{"current_steps": 2600, "total_steps": 5305, "loss": 0.2412, "lr": 3.0209592514657365e-06, "epoch": 2.4505183788878417, "percentage": 49.01, "elapsed_time": "0:12:49", "remaining_time": "0:13:20", "throughput": 7857.57, "total_tokens": 6043328}
{"current_steps": 2605, "total_steps": 5305, "loss": 0.0061, "lr": 3.012911238220241e-06, "epoch": 2.4552309142318567, "percentage": 49.1, "elapsed_time": "0:12:49", "remaining_time": "0:13:17", "throughput": 7867.39, "total_tokens": 6055424}
{"current_steps": 2610, "total_steps": 5305, "loss": 0.2284, "lr": 3.004857672114443e-06, "epoch": 2.4599434495758716, "percentage": 49.2, "elapsed_time": "0:12:50", "remaining_time": "0:13:15", "throughput": 7875.23, "total_tokens": 6065472}
{"current_steps": 2615, "total_steps": 5305, "loss": 0.0007, "lr": 2.996798640337556e-06, "epoch": 2.464655984919887, "percentage": 49.29, "elapsed_time": "0:12:50", "remaining_time": "0:13:12", "throughput": 7885.5, "total_tokens": 6078016}
{"current_steps": 2620, "total_steps": 5305, "loss": 0.0974, "lr": 2.9887342301379653e-06, "epoch": 2.469368520263902, "percentage": 49.39, "elapsed_time": "0:12:51", "remaining_time": "0:13:10", "throughput": 7894.69, "total_tokens": 6089472}
{"current_steps": 2625, "total_steps": 5305, "loss": 0.1484, "lr": 2.9806645288222854e-06, "epoch": 2.474081055607917, "percentage": 49.48, "elapsed_time": "0:12:51", "remaining_time": "0:13:08", "throughput": 7903.94, "total_tokens": 6100992}
{"current_steps": 2630, "total_steps": 5305, "loss": 0.0821, "lr": 2.9725896237544115e-06, "epoch": 2.4787935909519323, "percentage": 49.58, "elapsed_time": "0:12:52", "remaining_time": "0:13:05", "throughput": 7913.43, "total_tokens": 6112768}
{"current_steps": 2635, "total_steps": 5305, "loss": 0.0017, "lr": 2.9645096023545774e-06, "epoch": 2.4835061262959472, "percentage": 49.67, "elapsed_time": "0:12:52", "remaining_time": "0:13:03", "throughput": 7921.11, "total_tokens": 6122752}
{"current_steps": 2640, "total_steps": 5305, "loss": 0.05, "lr": 2.956424552098405e-06, "epoch": 2.488218661639962, "percentage": 49.76, "elapsed_time": "0:12:53", "remaining_time": "0:13:00", "throughput": 7932.31, "total_tokens": 6136256}
{"current_steps": 2645, "total_steps": 5305, "loss": 0.0714, "lr": 2.94833456051596e-06, "epoch": 2.492931196983977, "percentage": 49.86, "elapsed_time": "0:12:54", "remaining_time": "0:12:58", "throughput": 7940.99, "total_tokens": 6147264}
{"current_steps": 2650, "total_steps": 5305, "loss": 0.0012, "lr": 2.9402397151908056e-06, "epoch": 2.4976437323279925, "percentage": 49.95, "elapsed_time": "0:12:54", "remaining_time": "0:12:56", "throughput": 7952.49, "total_tokens": 6161088}
{"current_steps": 2655, "total_steps": 5305, "loss": 0.0567, "lr": 2.93214010375905e-06, "epoch": 2.5023562676720075, "percentage": 50.05, "elapsed_time": "0:12:55", "remaining_time": "0:12:53", "throughput": 7962.62, "total_tokens": 6173568}
{"current_steps": 2660, "total_steps": 5305, "loss": 0.0692, "lr": 2.924035813908402e-06, "epoch": 2.507068803016023, "percentage": 50.14, "elapsed_time": "0:12:55", "remaining_time": "0:12:51", "throughput": 7970.61, "total_tokens": 6183872}
{"current_steps": 2660, "total_steps": 5305, "eval_loss": 0.40977799892425537, "epoch": 2.507068803016023, "percentage": 50.14, "elapsed_time": "0:12:58", "remaining_time": "0:12:54", "throughput": 7942.58, "total_tokens": 6183872}
{"current_steps": 2665, "total_steps": 5305, "loss": 0.0693, "lr": 2.9159269333772173e-06, "epoch": 2.511781338360038, "percentage": 50.24, "elapsed_time": "0:13:40", "remaining_time": "0:13:32", "throughput": 7554.65, "total_tokens": 6195648}
{"current_steps": 2670, "total_steps": 5305, "loss": 0.0003, "lr": 2.9078135499535535e-06, "epoch": 2.5164938737040528, "percentage": 50.33, "elapsed_time": "0:13:40", "remaining_time": "0:13:29", "throughput": 7562.14, "total_tokens": 6205696}
{"current_steps": 2675, "total_steps": 5305, "loss": 0.0993, "lr": 2.8996957514742164e-06, "epoch": 2.5212064090480677, "percentage": 50.42, "elapsed_time": "0:13:41", "remaining_time": "0:13:27", "throughput": 7573.41, "total_tokens": 6219648}
{"current_steps": 2680, "total_steps": 5305, "loss": 0.0016, "lr": 2.891573625823808e-06, "epoch": 2.525918944392083, "percentage": 50.52, "elapsed_time": "0:13:41", "remaining_time": "0:13:25", "throughput": 7584.65, "total_tokens": 6233664}
{"current_steps": 2685, "total_steps": 5305, "loss": 0.0002, "lr": 2.883447260933781e-06, "epoch": 2.530631479736098, "percentage": 50.61, "elapsed_time": "0:13:42", "remaining_time": "0:13:22", "throughput": 7594.69, "total_tokens": 6246400}
{"current_steps": 2690, "total_steps": 5305, "loss": 0.0776, "lr": 2.875316744781479e-06, "epoch": 2.535344015080113, "percentage": 50.71, "elapsed_time": "0:13:42", "remaining_time": "0:13:20", "throughput": 7602.21, "total_tokens": 6256576}
{"current_steps": 2695, "total_steps": 5305, "loss": 0.0909, "lr": 2.8671821653891903e-06, "epoch": 2.5400565504241284, "percentage": 50.8, "elapsed_time": "0:13:43", "remaining_time": "0:13:17", "throughput": 7609.28, "total_tokens": 6266240}
{"current_steps": 2700, "total_steps": 5305, "loss": 0.1384, "lr": 2.85904361082319e-06, "epoch": 2.5447690857681433, "percentage": 50.9, "elapsed_time": "0:13:44", "remaining_time": "0:13:15", "throughput": 7620.12, "total_tokens": 6279872}
{"current_steps": 2705, "total_steps": 5305, "loss": 0.0001, "lr": 2.8509011691927923e-06, "epoch": 2.5494816211121583, "percentage": 50.99, "elapsed_time": "0:13:44", "remaining_time": "0:13:12", "throughput": 7627.66, "total_tokens": 6290048}
{"current_steps": 2710, "total_steps": 5305, "loss": 0.0368, "lr": 2.8427549286493906e-06, "epoch": 2.554194156456173, "percentage": 51.08, "elapsed_time": "0:13:45", "remaining_time": "0:13:10", "throughput": 7636.02, "total_tokens": 6301120}
{"current_steps": 2715, "total_steps": 5305, "loss": 0.1002, "lr": 2.8346049773855077e-06, "epoch": 2.5589066918001886, "percentage": 51.18, "elapsed_time": "0:13:45", "remaining_time": "0:13:07", "throughput": 7644.74, "total_tokens": 6312512}
{"current_steps": 2720, "total_steps": 5305, "loss": 0.0002, "lr": 2.8264514036338385e-06, "epoch": 2.5636192271442035, "percentage": 51.27, "elapsed_time": "0:13:46", "remaining_time": "0:13:05", "throughput": 7653.23, "total_tokens": 6323776}
{"current_steps": 2725, "total_steps": 5305, "loss": 0.0003, "lr": 2.818294295666295e-06, "epoch": 2.568331762488219, "percentage": 51.37, "elapsed_time": "0:13:46", "remaining_time": "0:13:02", "throughput": 7661.0, "total_tokens": 6334208}
{"current_steps": 2730, "total_steps": 5305, "loss": 0.0952, "lr": 2.8101337417930523e-06, "epoch": 2.573044297832234, "percentage": 51.46, "elapsed_time": "0:13:47", "remaining_time": "0:13:00", "throughput": 7669.29, "total_tokens": 6345216}
{"current_steps": 2735, "total_steps": 5305, "loss": 0.2239, "lr": 2.8019698303615912e-06, "epoch": 2.577756833176249, "percentage": 51.56, "elapsed_time": "0:13:47", "remaining_time": "0:12:57", "throughput": 7675.69, "total_tokens": 6354304}
{"current_steps": 2740, "total_steps": 5305, "loss": 0.0628, "lr": 2.7938026497557414e-06, "epoch": 2.5824693685202638, "percentage": 51.65, "elapsed_time": "0:13:48", "remaining_time": "0:12:55", "throughput": 7686.64, "total_tokens": 6368192}
{"current_steps": 2745, "total_steps": 5305, "loss": 0.0454, "lr": 2.7856322883947253e-06, "epoch": 2.5871819038642787, "percentage": 51.74, "elapsed_time": "0:13:49", "remaining_time": "0:12:53", "throughput": 7697.92, "total_tokens": 6382400}
{"current_steps": 2750, "total_steps": 5305, "loss": 0.0836, "lr": 2.7774588347322016e-06, "epoch": 2.591894439208294, "percentage": 51.84, "elapsed_time": "0:13:49", "remaining_time": "0:12:50", "throughput": 7708.21, "total_tokens": 6395584}
{"current_steps": 2755, "total_steps": 5305, "loss": 0.1468, "lr": 2.7692823772553057e-06, "epoch": 2.596606974552309, "percentage": 51.93, "elapsed_time": "0:13:50", "remaining_time": "0:12:48", "throughput": 7716.54, "total_tokens": 6406720}
{"current_steps": 2760, "total_steps": 5305, "loss": 0.1705, "lr": 2.7611030044836927e-06, "epoch": 2.6013195098963244, "percentage": 52.03, "elapsed_time": "0:13:50", "remaining_time": "0:12:46", "throughput": 7725.16, "total_tokens": 6418112}
{"current_steps": 2765, "total_steps": 5305, "loss": 0.0602, "lr": 2.752920804968581e-06, "epoch": 2.6060320452403394, "percentage": 52.12, "elapsed_time": "0:13:51", "remaining_time": "0:12:43", "throughput": 7735.26, "total_tokens": 6431104}
{"current_steps": 2770, "total_steps": 5305, "loss": 0.0038, "lr": 2.744735867291789e-06, "epoch": 2.6107445805843543, "percentage": 52.21, "elapsed_time": "0:13:51", "remaining_time": "0:12:41", "throughput": 7743.16, "total_tokens": 6441792}
{"current_steps": 2775, "total_steps": 5305, "loss": 0.167, "lr": 2.736548280064781e-06, "epoch": 2.6154571159283693, "percentage": 52.31, "elapsed_time": "0:13:52", "remaining_time": "0:12:38", "throughput": 7751.17, "total_tokens": 6452672}
{"current_steps": 2780, "total_steps": 5305, "loss": 0.1083, "lr": 2.728358131927704e-06, "epoch": 2.6201696512723847, "percentage": 52.4, "elapsed_time": "0:13:53", "remaining_time": "0:12:36", "throughput": 7761.12, "total_tokens": 6465600}
{"current_steps": 2785, "total_steps": 5305, "loss": 0.0731, "lr": 2.720165511548433e-06, "epoch": 2.6248821866163996, "percentage": 52.5, "elapsed_time": "0:13:53", "remaining_time": "0:12:34", "throughput": 7769.94, "total_tokens": 6477312}
{"current_steps": 2790, "total_steps": 5305, "loss": 0.179, "lr": 2.711970507621603e-06, "epoch": 2.6295947219604145, "percentage": 52.59, "elapsed_time": "0:13:54", "remaining_time": "0:12:31", "throughput": 7776.52, "total_tokens": 6486592}
{"current_steps": 2795, "total_steps": 5305, "loss": 0.0011, "lr": 2.7037732088676583e-06, "epoch": 2.63430725730443, "percentage": 52.69, "elapsed_time": "0:13:54", "remaining_time": "0:12:29", "throughput": 7784.15, "total_tokens": 6497088}
{"current_steps": 2800, "total_steps": 5305, "loss": 0.0035, "lr": 2.6955737040318853e-06, "epoch": 2.639019792648445, "percentage": 52.78, "elapsed_time": "0:13:55", "remaining_time": "0:12:27", "throughput": 7790.27, "total_tokens": 6505984}
{"current_steps": 2805, "total_steps": 5305, "loss": 0.0009, "lr": 2.687372081883454e-06, "epoch": 2.64373232799246, "percentage": 52.87, "elapsed_time": "0:13:55", "remaining_time": "0:12:24", "throughput": 7798.31, "total_tokens": 6516928}
{"current_steps": 2810, "total_steps": 5305, "loss": 0.0096, "lr": 2.6791684312144565e-06, "epoch": 2.6484448633364748, "percentage": 52.97, "elapsed_time": "0:13:56", "remaining_time": "0:12:22", "throughput": 7805.94, "total_tokens": 6527424}
{"current_steps": 2815, "total_steps": 5305, "loss": 0.0955, "lr": 2.670962840838946e-06, "epoch": 2.65315739868049, "percentage": 53.06, "elapsed_time": "0:13:56", "remaining_time": "0:12:20", "throughput": 7813.99, "total_tokens": 6538432}
{"current_steps": 2820, "total_steps": 5305, "loss": 0.0341, "lr": 2.6627553995919763e-06, "epoch": 2.657869934024505, "percentage": 53.16, "elapsed_time": "0:13:57", "remaining_time": "0:12:17", "throughput": 7824.08, "total_tokens": 6551552}
{"current_steps": 2825, "total_steps": 5305, "loss": 0.0005, "lr": 2.6545461963286374e-06, "epoch": 2.6625824693685205, "percentage": 53.25, "elapsed_time": "0:13:58", "remaining_time": "0:12:15", "throughput": 7835.49, "total_tokens": 6566208}
{"current_steps": 2830, "total_steps": 5305, "loss": 0.1887, "lr": 2.646335319923097e-06, "epoch": 2.6672950047125354, "percentage": 53.35, "elapsed_time": "0:13:58", "remaining_time": "0:12:13", "throughput": 7843.77, "total_tokens": 6577472}
{"current_steps": 2835, "total_steps": 5305, "loss": 0.1243, "lr": 2.6381228592676343e-06, "epoch": 2.6720075400565504, "percentage": 53.44, "elapsed_time": "0:13:59", "remaining_time": "0:12:11", "throughput": 7851.92, "total_tokens": 6588608}
{"current_steps": 2840, "total_steps": 5305, "loss": 0.1048, "lr": 2.629908903271683e-06, "epoch": 2.6767200754005653, "percentage": 53.53, "elapsed_time": "0:13:59", "remaining_time": "0:12:08", "throughput": 7861.37, "total_tokens": 6601088}
{"current_steps": 2845, "total_steps": 5305, "loss": 0.0005, "lr": 2.6216935408608617e-06, "epoch": 2.6814326107445807, "percentage": 53.63, "elapsed_time": "0:14:00", "remaining_time": "0:12:06", "throughput": 7868.76, "total_tokens": 6611392}
{"current_steps": 2850, "total_steps": 5305, "loss": 0.001, "lr": 2.6134768609760187e-06, "epoch": 2.6861451460885957, "percentage": 53.72, "elapsed_time": "0:14:00", "remaining_time": "0:12:04", "throughput": 7877.0, "total_tokens": 6622656}
{"current_steps": 2855, "total_steps": 5305, "loss": 0.0916, "lr": 2.605258952572263e-06, "epoch": 2.6908576814326106, "percentage": 53.82, "elapsed_time": "0:14:01", "remaining_time": "0:12:01", "throughput": 7886.48, "total_tokens": 6635264}
{"current_steps": 2860, "total_steps": 5305, "loss": 0.0028, "lr": 2.5970399046180043e-06, "epoch": 2.695570216776626, "percentage": 53.91, "elapsed_time": "0:14:01", "remaining_time": "0:11:59", "throughput": 7895.73, "total_tokens": 6647680}
{"current_steps": 2865, "total_steps": 5305, "loss": 0.0001, "lr": 2.588819806093991e-06, "epoch": 2.700282752120641, "percentage": 54.01, "elapsed_time": "0:14:02", "remaining_time": "0:11:57", "throughput": 7906.79, "total_tokens": 6662016}
{"current_steps": 2870, "total_steps": 5305, "loss": 0.1805, "lr": 2.580598745992342e-06, "epoch": 2.704995287464656, "percentage": 54.1, "elapsed_time": "0:14:03", "remaining_time": "0:11:55", "throughput": 7914.71, "total_tokens": 6673024}
{"current_steps": 2875, "total_steps": 5305, "loss": 0.0001, "lr": 2.5723768133155894e-06, "epoch": 2.709707822808671, "percentage": 54.19, "elapsed_time": "0:14:03", "remaining_time": "0:11:53", "throughput": 7923.04, "total_tokens": 6684416}
{"current_steps": 2880, "total_steps": 5305, "loss": 0.0783, "lr": 2.5641540970757105e-06, "epoch": 2.7144203581526862, "percentage": 54.29, "elapsed_time": "0:14:04", "remaining_time": "0:11:50", "throughput": 7931.95, "total_tokens": 6696448}
{"current_steps": 2885, "total_steps": 5305, "loss": 0.0002, "lr": 2.555930686293165e-06, "epoch": 2.719132893496701, "percentage": 54.38, "elapsed_time": "0:14:04", "remaining_time": "0:11:48", "throughput": 7942.64, "total_tokens": 6710528}
{"current_steps": 2890, "total_steps": 5305, "loss": 0.0004, "lr": 2.547706669995933e-06, "epoch": 2.7238454288407166, "percentage": 54.48, "elapsed_time": "0:14:05", "remaining_time": "0:11:46", "throughput": 7951.15, "total_tokens": 6722176}
{"current_steps": 2895, "total_steps": 5305, "loss": 0.1775, "lr": 2.53948213721855e-06, "epoch": 2.7285579641847315, "percentage": 54.57, "elapsed_time": "0:14:05", "remaining_time": "0:11:44", "throughput": 7958.31, "total_tokens": 6732416}
{"current_steps": 2900, "total_steps": 5305, "loss": 0.1137, "lr": 2.531257177001141e-06, "epoch": 2.7332704995287465, "percentage": 54.67, "elapsed_time": "0:14:06", "remaining_time": "0:11:42", "throughput": 7968.39, "total_tokens": 6745728}
{"current_steps": 2905, "total_steps": 5305, "loss": 0.0956, "lr": 2.523031878388463e-06, "epoch": 2.7379830348727614, "percentage": 54.76, "elapsed_time": "0:14:07", "remaining_time": "0:11:39", "throughput": 7975.66, "total_tokens": 6756096}
{"current_steps": 2910, "total_steps": 5305, "loss": 0.063, "lr": 2.5148063304289306e-06, "epoch": 2.742695570216777, "percentage": 54.85, "elapsed_time": "0:14:07", "remaining_time": "0:11:37", "throughput": 7983.39, "total_tokens": 6766976}
{"current_steps": 2915, "total_steps": 5305, "loss": 0.1039, "lr": 2.5065806221736617e-06, "epoch": 2.7474081055607917, "percentage": 54.95, "elapsed_time": "0:14:08", "remaining_time": "0:11:35", "throughput": 7991.06, "total_tokens": 6777792}
{"current_steps": 2920, "total_steps": 5305, "loss": 0.0003, "lr": 2.4983548426755104e-06, "epoch": 2.7521206409048067, "percentage": 55.04, "elapsed_time": "0:14:08", "remaining_time": "0:11:33", "throughput": 7999.6, "total_tokens": 6789568}
{"current_steps": 2925, "total_steps": 5305, "loss": 0.227, "lr": 2.4901290809880984e-06, "epoch": 2.756833176248822, "percentage": 55.14, "elapsed_time": "0:14:09", "remaining_time": "0:11:31", "throughput": 8010.05, "total_tokens": 6803392}
{"current_steps": 2926, "total_steps": 5305, "eval_loss": 0.43027257919311523, "epoch": 2.757775683317625, "percentage": 55.16, "elapsed_time": "0:14:12", "remaining_time": "0:11:32", "throughput": 7986.35, "total_tokens": 6806208}
{"current_steps": 2930, "total_steps": 5305, "loss": 0.0645, "lr": 2.4819034261648574e-06, "epoch": 2.761545711592837, "percentage": 55.23, "elapsed_time": "0:16:08", "remaining_time": "0:13:04", "throughput": 7044.22, "total_tokens": 6821760}
{"current_steps": 2935, "total_steps": 5305, "loss": 0.2084, "lr": 2.4736779672580625e-06, "epoch": 2.766258246936852, "percentage": 55.33, "elapsed_time": "0:16:09", "remaining_time": "0:13:02", "throughput": 7053.22, "total_tokens": 6834688}
{"current_steps": 2940, "total_steps": 5305, "loss": 0.0731, "lr": 2.465452793317865e-06, "epoch": 2.770970782280867, "percentage": 55.42, "elapsed_time": "0:16:09", "remaining_time": "0:12:59", "throughput": 7061.52, "total_tokens": 6846784}
{"current_steps": 2945, "total_steps": 5305, "loss": 0.0866, "lr": 2.457227993391333e-06, "epoch": 2.7756833176248823, "percentage": 55.51, "elapsed_time": "0:16:10", "remaining_time": "0:12:57", "throughput": 7070.3, "total_tokens": 6859520}
{"current_steps": 2950, "total_steps": 5305, "loss": 0.0008, "lr": 2.4490036565214876e-06, "epoch": 2.7803958529688972, "percentage": 55.61, "elapsed_time": "0:16:10", "remaining_time": "0:12:54", "throughput": 7078.31, "total_tokens": 6871296}
{"current_steps": 2955, "total_steps": 5305, "loss": 0.0151, "lr": 2.440779871746331e-06, "epoch": 2.785108388312912, "percentage": 55.7, "elapsed_time": "0:16:11", "remaining_time": "0:12:52", "throughput": 7085.83, "total_tokens": 6882496}
{"current_steps": 2960, "total_steps": 5305, "loss": 0.0708, "lr": 2.4325567280978937e-06, "epoch": 2.7898209236569276, "percentage": 55.8, "elapsed_time": "0:16:11", "remaining_time": "0:12:49", "throughput": 7094.03, "total_tokens": 6894528}
{"current_steps": 2965, "total_steps": 5305, "loss": 0.1738, "lr": 2.424334314601263e-06, "epoch": 2.7945334590009425, "percentage": 55.89, "elapsed_time": "0:16:12", "remaining_time": "0:12:47", "throughput": 7100.92, "total_tokens": 6904960}
{"current_steps": 2970, "total_steps": 5305, "loss": 0.155, "lr": 2.416112720273623e-06, "epoch": 2.7992459943449575, "percentage": 55.98, "elapsed_time": "0:16:12", "remaining_time": "0:12:44", "throughput": 7107.45, "total_tokens": 6914944}
{"current_steps": 2975, "total_steps": 5305, "loss": 0.0006, "lr": 2.4078920341232856e-06, "epoch": 2.8039585296889724, "percentage": 56.08, "elapsed_time": "0:16:13", "remaining_time": "0:12:42", "throughput": 7114.88, "total_tokens": 6926080}
{"current_steps": 2980, "total_steps": 5305, "loss": 0.0028, "lr": 2.3996723451487344e-06, "epoch": 2.808671065032988, "percentage": 56.17, "elapsed_time": "0:16:13", "remaining_time": "0:12:39", "throughput": 7122.04, "total_tokens": 6936832}
{"current_steps": 2985, "total_steps": 5305, "loss": 0.2284, "lr": 2.391453742337657e-06, "epoch": 2.8133836003770027, "percentage": 56.27, "elapsed_time": "0:16:14", "remaining_time": "0:12:37", "throughput": 7129.64, "total_tokens": 6948160}
{"current_steps": 2990, "total_steps": 5305, "loss": 0.0003, "lr": 2.3832363146659806e-06, "epoch": 2.818096135721018, "percentage": 56.36, "elapsed_time": "0:16:15", "remaining_time": "0:12:34", "throughput": 7136.71, "total_tokens": 6958848}
{"current_steps": 2995, "total_steps": 5305, "loss": 0.1133, "lr": 2.37502015109691e-06, "epoch": 2.822808671065033, "percentage": 56.46, "elapsed_time": "0:16:15", "remaining_time": "0:12:32", "throughput": 7144.5, "total_tokens": 6970432}
{"current_steps": 3000, "total_steps": 5305, "loss": 0.0691, "lr": 2.3668053405799667e-06, "epoch": 2.827521206409048, "percentage": 56.55, "elapsed_time": "0:16:16", "remaining_time": "0:12:30", "throughput": 7151.03, "total_tokens": 6980480}
{"current_steps": 3005, "total_steps": 5305, "loss": 0.0368, "lr": 2.3585919720500214e-06, "epoch": 2.832233741753063, "percentage": 56.64, "elapsed_time": "0:16:16", "remaining_time": "0:12:27", "throughput": 7156.87, "total_tokens": 6989760}
{"current_steps": 3010, "total_steps": 5305, "loss": 0.093, "lr": 2.3503801344263347e-06, "epoch": 2.8369462770970784, "percentage": 56.74, "elapsed_time": "0:16:17", "remaining_time": "0:12:25", "throughput": 7162.88, "total_tokens": 6999232}
{"current_steps": 3015, "total_steps": 5305, "loss": 0.2148, "lr": 2.3421699166115946e-06, "epoch": 2.8416588124410933, "percentage": 56.83, "elapsed_time": "0:16:17", "remaining_time": "0:12:22", "throughput": 7170.74, "total_tokens": 7010944}
{"current_steps": 3020, "total_steps": 5305, "loss": 0.1475, "lr": 2.3339614074909495e-06, "epoch": 2.8463713477851083, "percentage": 56.93, "elapsed_time": "0:16:18", "remaining_time": "0:12:20", "throughput": 7177.94, "total_tokens": 7021824}
{"current_steps": 3025, "total_steps": 5305, "loss": 0.1085, "lr": 2.325754695931054e-06, "epoch": 2.8510838831291236, "percentage": 57.02, "elapsed_time": "0:16:18", "remaining_time": "0:12:17", "throughput": 7184.11, "total_tokens": 7031488}
{"current_steps": 3030, "total_steps": 5305, "loss": 0.0536, "lr": 2.3175498707790964e-06, "epoch": 2.8557964184731386, "percentage": 57.12, "elapsed_time": "0:16:19", "remaining_time": "0:12:15", "throughput": 7190.22, "total_tokens": 7041088}
{"current_steps": 3035, "total_steps": 5305, "loss": 0.1759, "lr": 2.3093470208618467e-06, "epoch": 2.8605089538171535, "percentage": 57.21, "elapsed_time": "0:16:19", "remaining_time": "0:12:12", "throughput": 7197.31, "total_tokens": 7051840}
{"current_steps": 3040, "total_steps": 5305, "loss": 0.0005, "lr": 2.3011462349846907e-06, "epoch": 2.8652214891611685, "percentage": 57.3, "elapsed_time": "0:16:20", "remaining_time": "0:12:10", "throughput": 7204.56, "total_tokens": 7062848}
{"current_steps": 3045, "total_steps": 5305, "loss": 0.0006, "lr": 2.292947601930664e-06, "epoch": 2.869934024505184, "percentage": 57.4, "elapsed_time": "0:16:21", "remaining_time": "0:12:08", "throughput": 7216.11, "total_tokens": 7079296}
{"current_steps": 3050, "total_steps": 5305, "loss": 0.1614, "lr": 2.2847512104595005e-06, "epoch": 2.874646559849199, "percentage": 57.49, "elapsed_time": "0:16:21", "remaining_time": "0:12:05", "throughput": 7223.66, "total_tokens": 7090752}
{"current_steps": 3055, "total_steps": 5305, "loss": 0.0003, "lr": 2.2765571493066647e-06, "epoch": 2.879359095193214, "percentage": 57.59, "elapsed_time": "0:16:22", "remaining_time": "0:12:03", "throughput": 7231.43, "total_tokens": 7102464}
{"current_steps": 3060, "total_steps": 5305, "loss": 0.038, "lr": 2.2683655071823925e-06, "epoch": 2.884071630537229, "percentage": 57.68, "elapsed_time": "0:16:22", "remaining_time": "0:12:01", "throughput": 7241.77, "total_tokens": 7117376}
{"current_steps": 3065, "total_steps": 5305, "loss": 0.0809, "lr": 2.2601763727707295e-06, "epoch": 2.888784165881244, "percentage": 57.78, "elapsed_time": "0:16:23", "remaining_time": "0:11:58", "throughput": 7251.54, "total_tokens": 7131584}
{"current_steps": 3070, "total_steps": 5305, "loss": 0.1831, "lr": 2.2519898347285745e-06, "epoch": 2.893496701225259, "percentage": 57.87, "elapsed_time": "0:16:24", "remaining_time": "0:11:56", "throughput": 7258.85, "total_tokens": 7142720}
{"current_steps": 3075, "total_steps": 5305, "loss": 0.1239, "lr": 2.2438059816847165e-06, "epoch": 2.8982092365692744, "percentage": 57.96, "elapsed_time": "0:16:24", "remaining_time": "0:11:54", "throughput": 7267.48, "total_tokens": 7155520}
{"current_steps": 3080, "total_steps": 5305, "loss": 0.0753, "lr": 2.235624902238879e-06, "epoch": 2.9029217719132894, "percentage": 58.06, "elapsed_time": "0:16:25", "remaining_time": "0:11:51", "throughput": 7273.81, "total_tokens": 7165504}
{"current_steps": 3085, "total_steps": 5305, "loss": 0.118, "lr": 2.2274466849607526e-06, "epoch": 2.9076343072573043, "percentage": 58.15, "elapsed_time": "0:16:25", "remaining_time": "0:11:49", "throughput": 7280.91, "total_tokens": 7176384}
{"current_steps": 3090, "total_steps": 5305, "loss": 0.0012, "lr": 2.219271418389046e-06, "epoch": 2.9123468426013197, "percentage": 58.25, "elapsed_time": "0:16:26", "remaining_time": "0:11:46", "throughput": 7288.83, "total_tokens": 7188288}
{"current_steps": 3095, "total_steps": 5305, "loss": 0.1523, "lr": 2.2110991910305233e-06, "epoch": 2.9170593779453347, "percentage": 58.34, "elapsed_time": "0:16:26", "remaining_time": "0:11:44", "throughput": 7296.29, "total_tokens": 7199680}
{"current_steps": 3100, "total_steps": 5305, "loss": 0.0548, "lr": 2.2029300913590413e-06, "epoch": 2.9217719132893496, "percentage": 58.44, "elapsed_time": "0:16:27", "remaining_time": "0:11:42", "throughput": 7304.11, "total_tokens": 7211520}
{"current_steps": 3105, "total_steps": 5305, "loss": 0.0932, "lr": 2.1947642078146005e-06, "epoch": 2.9264844486333645, "percentage": 58.53, "elapsed_time": "0:16:27", "remaining_time": "0:11:39", "throughput": 7310.39, "total_tokens": 7221440}
{"current_steps": 3110, "total_steps": 5305, "loss": 0.0528, "lr": 2.1866016288023815e-06, "epoch": 2.93119698397738, "percentage": 58.62, "elapsed_time": "0:16:28", "remaining_time": "0:11:37", "throughput": 7317.31, "total_tokens": 7232128}
{"current_steps": 3115, "total_steps": 5305, "loss": 0.1414, "lr": 2.178442442691789e-06, "epoch": 2.935909519321395, "percentage": 58.72, "elapsed_time": "0:16:28", "remaining_time": "0:11:35", "throughput": 7323.52, "total_tokens": 7241984}
{"current_steps": 3120, "total_steps": 5305, "loss": 0.0745, "lr": 2.170286737815495e-06, "epoch": 2.9406220546654103, "percentage": 58.81, "elapsed_time": "0:16:29", "remaining_time": "0:11:32", "throughput": 7330.41, "total_tokens": 7252672}
{"current_steps": 3125, "total_steps": 5305, "loss": 0.0453, "lr": 2.1621346024684854e-06, "epoch": 2.945334590009425, "percentage": 58.91, "elapsed_time": "0:16:29", "remaining_time": "0:11:30", "throughput": 7337.82, "total_tokens": 7264064}
{"current_steps": 3130, "total_steps": 5305, "loss": 0.0268, "lr": 2.1539861249071004e-06, "epoch": 2.95004712535344, "percentage": 59.0, "elapsed_time": "0:16:30", "remaining_time": "0:11:28", "throughput": 7345.51, "total_tokens": 7275776}
{"current_steps": 3135, "total_steps": 5305, "loss": 0.0361, "lr": 2.145841393348079e-06, "epoch": 2.954759660697455, "percentage": 59.1, "elapsed_time": "0:16:31", "remaining_time": "0:11:26", "throughput": 7353.35, "total_tokens": 7287680}
{"current_steps": 3140, "total_steps": 5305, "loss": 0.001, "lr": 2.1377004959676086e-06, "epoch": 2.95947219604147, "percentage": 59.19, "elapsed_time": "0:16:31", "remaining_time": "0:11:23", "throughput": 7361.52, "total_tokens": 7300032}
{"current_steps": 3145, "total_steps": 5305, "loss": 0.0632, "lr": 2.129563520900364e-06, "epoch": 2.9641847313854854, "percentage": 59.28, "elapsed_time": "0:16:32", "remaining_time": "0:11:21", "throughput": 7369.08, "total_tokens": 7311616}
{"current_steps": 3150, "total_steps": 5305, "loss": 0.1604, "lr": 2.1214305562385592e-06, "epoch": 2.9688972667295004, "percentage": 59.38, "elapsed_time": "0:16:32", "remaining_time": "0:11:19", "throughput": 7375.36, "total_tokens": 7321600}
{"current_steps": 3155, "total_steps": 5305, "loss": 0.0003, "lr": 2.1133016900309876e-06, "epoch": 2.9736098020735158, "percentage": 59.47, "elapsed_time": "0:16:33", "remaining_time": "0:11:16", "throughput": 7383.0, "total_tokens": 7333376}
{"current_steps": 3160, "total_steps": 5305, "loss": 0.0002, "lr": 2.1051770102820755e-06, "epoch": 2.9783223374175307, "percentage": 59.57, "elapsed_time": "0:16:33", "remaining_time": "0:11:14", "throughput": 7390.04, "total_tokens": 7344384}
{"current_steps": 3165, "total_steps": 5305, "loss": 0.0799, "lr": 2.0970566049509236e-06, "epoch": 2.9830348727615457, "percentage": 59.66, "elapsed_time": "0:16:34", "remaining_time": "0:11:12", "throughput": 7397.42, "total_tokens": 7355840}
{"current_steps": 3170, "total_steps": 5305, "loss": 0.0002, "lr": 2.088940561950359e-06, "epoch": 2.9877474081055606, "percentage": 59.75, "elapsed_time": "0:16:34", "remaining_time": "0:11:10", "throughput": 7405.5, "total_tokens": 7368128}
{"current_steps": 3175, "total_steps": 5305, "loss": 0.1426, "lr": 2.080828969145979e-06, "epoch": 2.992459943449576, "percentage": 59.85, "elapsed_time": "0:16:35", "remaining_time": "0:11:07", "throughput": 7414.02, "total_tokens": 7381056}
{"current_steps": 3180, "total_steps": 5305, "loss": 0.094, "lr": 2.0727219143552034e-06, "epoch": 2.997172478793591, "percentage": 59.94, "elapsed_time": "0:16:36", "remaining_time": "0:11:05", "throughput": 7422.19, "total_tokens": 7393536}
{"current_steps": 3185, "total_steps": 5305, "loss": 0.0923, "lr": 2.0646194853463255e-06, "epoch": 3.001885014137606, "percentage": 60.04, "elapsed_time": "0:16:36", "remaining_time": "0:11:03", "throughput": 7426.83, "total_tokens": 7402656}
{"current_steps": 3190, "total_steps": 5305, "loss": 0.0004, "lr": 2.056521769837553e-06, "epoch": 3.0065975494816213, "percentage": 60.13, "elapsed_time": "0:16:37", "remaining_time": "0:11:01", "throughput": 7436.05, "total_tokens": 7416480}
{"current_steps": 3192, "total_steps": 5305, "eval_loss": 0.3936729431152344, "epoch": 3.008482563619227, "percentage": 60.17, "elapsed_time": "0:16:40", "remaining_time": "0:11:02", "throughput": 7419.33, "total_tokens": 7421856}
{"current_steps": 3195, "total_steps": 5305, "loss": 0.0003, "lr": 2.0484288554960707e-06, "epoch": 3.0113100848256362, "percentage": 60.23, "elapsed_time": "0:17:06", "remaining_time": "0:11:17", "throughput": 7237.87, "total_tokens": 7430304}
{"current_steps": 3200, "total_steps": 5305, "loss": 0.052, "lr": 2.040340829937082e-06, "epoch": 3.016022620169651, "percentage": 60.32, "elapsed_time": "0:17:07", "remaining_time": "0:11:15", "throughput": 7244.93, "total_tokens": 7441568}
{"current_steps": 3205, "total_steps": 5305, "loss": 0.0003, "lr": 2.032257780722865e-06, "epoch": 3.0207351555136666, "percentage": 60.41, "elapsed_time": "0:17:07", "remaining_time": "0:11:13", "throughput": 7251.14, "total_tokens": 7451744}
{"current_steps": 3210, "total_steps": 5305, "loss": 0.0002, "lr": 2.0241797953618204e-06, "epoch": 3.0254476908576815, "percentage": 60.51, "elapsed_time": "0:17:08", "remaining_time": "0:11:11", "throughput": 7258.22, "total_tokens": 7463008}
{"current_steps": 3215, "total_steps": 5305, "loss": 0.0001, "lr": 2.0161069613075295e-06, "epoch": 3.0301602262016964, "percentage": 60.6, "elapsed_time": "0:17:08", "remaining_time": "0:11:08", "throughput": 7266.15, "total_tokens": 7475424}
{"current_steps": 3220, "total_steps": 5305, "loss": 0.0002, "lr": 2.008039365957804e-06, "epoch": 3.0348727615457114, "percentage": 60.7, "elapsed_time": "0:17:09", "remaining_time": "0:11:06", "throughput": 7273.0, "total_tokens": 7486368}
{"current_steps": 3225, "total_steps": 5305, "loss": 0.0005, "lr": 1.9999770966537416e-06, "epoch": 3.039585296889727, "percentage": 60.79, "elapsed_time": "0:17:09", "remaining_time": "0:11:04", "throughput": 7279.83, "total_tokens": 7497312}
{"current_steps": 3230, "total_steps": 5305, "loss": 0.0457, "lr": 1.991920240678776e-06, "epoch": 3.0442978322337417, "percentage": 60.89, "elapsed_time": "0:17:10", "remaining_time": "0:11:01", "throughput": 7286.09, "total_tokens": 7507552}
{"current_steps": 3235, "total_steps": 5305, "loss": 0.0001, "lr": 1.983868885257739e-06, "epoch": 3.0490103675777567, "percentage": 60.98, "elapsed_time": "0:17:10", "remaining_time": "0:10:59", "throughput": 7293.28, "total_tokens": 7519008}
{"current_steps": 3240, "total_steps": 5305, "loss": 0.0908, "lr": 1.97582311755591e-06, "epoch": 3.053722902921772, "percentage": 61.07, "elapsed_time": "0:17:11", "remaining_time": "0:10:57", "throughput": 7300.4, "total_tokens": 7530400}
{"current_steps": 3245, "total_steps": 5305, "loss": 0.0002, "lr": 1.9677830246780764e-06, "epoch": 3.058435438265787, "percentage": 61.17, "elapsed_time": "0:17:12", "remaining_time": "0:10:55", "throughput": 7309.26, "total_tokens": 7544096}
{"current_steps": 3250, "total_steps": 5305, "loss": 0.0044, "lr": 1.9597486936675886e-06, "epoch": 3.063147973609802, "percentage": 61.26, "elapsed_time": "0:17:12", "remaining_time": "0:10:52", "throughput": 7315.79, "total_tokens": 7554784}
{"current_steps": 3255, "total_steps": 5305, "loss": 0.0001, "lr": 1.9517202115054174e-06, "epoch": 3.0678605089538173, "percentage": 61.36, "elapsed_time": "0:17:13", "remaining_time": "0:10:50", "throughput": 7323.84, "total_tokens": 7567392}
{"current_steps": 3260, "total_steps": 5305, "loss": 0.0001, "lr": 1.9436976651092143e-06, "epoch": 3.0725730442978323, "percentage": 61.45, "elapsed_time": "0:17:13", "remaining_time": "0:10:48", "throughput": 7330.26, "total_tokens": 7578016}
{"current_steps": 3265, "total_steps": 5305, "loss": 0.0689, "lr": 1.9356811413323686e-06, "epoch": 3.0772855796418472, "percentage": 61.55, "elapsed_time": "0:17:14", "remaining_time": "0:10:46", "throughput": 7337.62, "total_tokens": 7589728}
{"current_steps": 3270, "total_steps": 5305, "loss": 0.0006, "lr": 1.9276707269630664e-06, "epoch": 3.081998114985862, "percentage": 61.64, "elapsed_time": "0:17:14", "remaining_time": "0:10:44", "throughput": 7344.77, "total_tokens": 7601184}
{"current_steps": 3275, "total_steps": 5305, "loss": 0.0001, "lr": 1.9196665087233548e-06, "epoch": 3.0867106503298776, "percentage": 61.73, "elapsed_time": "0:17:15", "remaining_time": "0:10:41", "throughput": 7351.49, "total_tokens": 7612128}
{"current_steps": 3280, "total_steps": 5305, "loss": 0.0004, "lr": 1.9116685732681995e-06, "epoch": 3.0914231856738925, "percentage": 61.83, "elapsed_time": "0:17:16", "remaining_time": "0:10:39", "throughput": 7358.78, "total_tokens": 7623776}
{"current_steps": 3285, "total_steps": 5305, "loss": 0.0001, "lr": 1.9036770071845467e-06, "epoch": 3.0961357210179075, "percentage": 61.92, "elapsed_time": "0:17:16", "remaining_time": "0:10:37", "throughput": 7366.58, "total_tokens": 7636128}
{"current_steps": 3290, "total_steps": 5305, "loss": 0.0002, "lr": 1.8956918969903881e-06, "epoch": 3.100848256361923, "percentage": 62.02, "elapsed_time": "0:17:17", "remaining_time": "0:10:35", "throughput": 7372.83, "total_tokens": 7646432}
{"current_steps": 3295, "total_steps": 5305, "loss": 0.0, "lr": 1.887713329133824e-06, "epoch": 3.105560791705938, "percentage": 62.11, "elapsed_time": "0:17:17", "remaining_time": "0:10:32", "throughput": 7379.86, "total_tokens": 7657824}
{"current_steps": 3300, "total_steps": 5305, "loss": 0.0829, "lr": 1.8797413899921224e-06, "epoch": 3.1102733270499527, "percentage": 62.21, "elapsed_time": "0:17:18", "remaining_time": "0:10:30", "throughput": 7387.42, "total_tokens": 7669920}
{"current_steps": 3305, "total_steps": 5305, "loss": 0.0054, "lr": 1.8717761658707916e-06, "epoch": 3.114985862393968, "percentage": 62.3, "elapsed_time": "0:17:18", "remaining_time": "0:10:28", "throughput": 7394.95, "total_tokens": 7681952}
{"current_steps": 3310, "total_steps": 5305, "loss": 0.0, "lr": 1.86381774300264e-06, "epoch": 3.119698397737983, "percentage": 62.39, "elapsed_time": "0:17:19", "remaining_time": "0:10:26", "throughput": 7401.6, "total_tokens": 7692832}
{"current_steps": 3315, "total_steps": 5305, "loss": 0.1029, "lr": 1.8558662075468468e-06, "epoch": 3.124410933081998, "percentage": 62.49, "elapsed_time": "0:17:19", "remaining_time": "0:10:24", "throughput": 7407.78, "total_tokens": 7703072}
{"current_steps": 3320, "total_steps": 5305, "loss": 0.0, "lr": 1.8479216455880225e-06, "epoch": 3.1291234684260134, "percentage": 62.58, "elapsed_time": "0:17:20", "remaining_time": "0:10:22", "throughput": 7414.44, "total_tokens": 7714016}
{"current_steps": 3325, "total_steps": 5305, "loss": 0.0002, "lr": 1.8399841431352855e-06, "epoch": 3.1338360037700284, "percentage": 62.68, "elapsed_time": "0:17:20", "remaining_time": "0:10:19", "throughput": 7422.4, "total_tokens": 7726688}
{"current_steps": 3330, "total_steps": 5305, "loss": 0.0001, "lr": 1.8320537861213267e-06, "epoch": 3.1385485391140433, "percentage": 62.77, "elapsed_time": "0:17:21", "remaining_time": "0:10:17", "throughput": 7430.57, "total_tokens": 7739680}
{"current_steps": 3335, "total_steps": 5305, "loss": 0.0001, "lr": 1.8241306604014761e-06, "epoch": 3.1432610744580582, "percentage": 62.87, "elapsed_time": "0:17:22", "remaining_time": "0:10:15", "throughput": 7435.98, "total_tokens": 7749024}
{"current_steps": 3340, "total_steps": 5305, "loss": 0.0008, "lr": 1.816214851752779e-06, "epoch": 3.1479736098020736, "percentage": 62.96, "elapsed_time": "0:17:22", "remaining_time": "0:10:13", "throughput": 7443.81, "total_tokens": 7761568}
{"current_steps": 3345, "total_steps": 5305, "loss": 0.0001, "lr": 1.8083064458730651e-06, "epoch": 3.1526861451460886, "percentage": 63.05, "elapsed_time": "0:17:23", "remaining_time": "0:10:11", "throughput": 7450.5, "total_tokens": 7772640}
{"current_steps": 3350, "total_steps": 5305, "loss": 0.0004, "lr": 1.8004055283800204e-06, "epoch": 3.1573986804901035, "percentage": 63.15, "elapsed_time": "0:17:23", "remaining_time": "0:10:09", "throughput": 7457.88, "total_tokens": 7784672}
{"current_steps": 3355, "total_steps": 5305, "loss": 0.0, "lr": 1.7925121848102583e-06, "epoch": 3.162111215834119, "percentage": 63.24, "elapsed_time": "0:17:24", "remaining_time": "0:10:07", "throughput": 7464.66, "total_tokens": 7795872}
{"current_steps": 3360, "total_steps": 5305, "loss": 0.0, "lr": 1.7846265006183976e-06, "epoch": 3.166823751178134, "percentage": 63.34, "elapsed_time": "0:17:24", "remaining_time": "0:10:04", "throughput": 7472.42, "total_tokens": 7808416}
{"current_steps": 3365, "total_steps": 5305, "loss": 0.0, "lr": 1.776748561176137e-06, "epoch": 3.171536286522149, "percentage": 63.43, "elapsed_time": "0:17:25", "remaining_time": "0:10:02", "throughput": 7479.95, "total_tokens": 7820640}
{"current_steps": 3370, "total_steps": 5305, "loss": 0.0, "lr": 1.7688784517713247e-06, "epoch": 3.176248821866164, "percentage": 63.52, "elapsed_time": "0:17:26", "remaining_time": "0:10:00", "throughput": 7486.12, "total_tokens": 7831072}
{"current_steps": 3375, "total_steps": 5305, "loss": 0.0969, "lr": 1.761016257607044e-06, "epoch": 3.180961357210179, "percentage": 63.62, "elapsed_time": "0:17:26", "remaining_time": "0:09:58", "throughput": 7492.59, "total_tokens": 7841888}
{"current_steps": 3380, "total_steps": 5305, "loss": 0.0488, "lr": 1.7531620638006834e-06, "epoch": 3.185673892554194, "percentage": 63.71, "elapsed_time": "0:17:27", "remaining_time": "0:09:56", "throughput": 7499.2, "total_tokens": 7852896}
{"current_steps": 3385, "total_steps": 5305, "loss": 0.0013, "lr": 1.7453159553830217e-06, "epoch": 3.190386427898209, "percentage": 63.81, "elapsed_time": "0:17:27", "remaining_time": "0:09:54", "throughput": 7509.09, "total_tokens": 7868384}
{"current_steps": 3390, "total_steps": 5305, "loss": 0.0001, "lr": 1.7374780172973004e-06, "epoch": 3.1950989632422244, "percentage": 63.9, "elapsed_time": "0:17:28", "remaining_time": "0:09:52", "throughput": 7517.1, "total_tokens": 7881312}
{"current_steps": 3395, "total_steps": 5305, "loss": 0.0564, "lr": 1.7296483343983095e-06, "epoch": 3.1998114985862394, "percentage": 64.0, "elapsed_time": "0:17:28", "remaining_time": "0:09:50", "throughput": 7523.53, "total_tokens": 7892128}
{"current_steps": 3400, "total_steps": 5305, "loss": 0.0002, "lr": 1.7218269914514668e-06, "epoch": 3.2045240339302543, "percentage": 64.09, "elapsed_time": "0:17:29", "remaining_time": "0:09:48", "throughput": 7529.74, "total_tokens": 7902624}
{"current_steps": 3405, "total_steps": 5305, "loss": 0.0001, "lr": 1.714014073131901e-06, "epoch": 3.2092365692742697, "percentage": 64.18, "elapsed_time": "0:17:30", "remaining_time": "0:09:45", "throughput": 7537.44, "total_tokens": 7915168}
{"current_steps": 3410, "total_steps": 5305, "loss": 0.0002, "lr": 1.7062096640235327e-06, "epoch": 3.2139491046182846, "percentage": 64.28, "elapsed_time": "0:17:30", "remaining_time": "0:09:43", "throughput": 7543.41, "total_tokens": 7925472}
{"current_steps": 3415, "total_steps": 5305, "loss": 0.0001, "lr": 1.6984138486181612e-06, "epoch": 3.2186616399622996, "percentage": 64.37, "elapsed_time": "0:17:31", "remaining_time": "0:09:41", "throughput": 7553.01, "total_tokens": 7940576}
{"current_steps": 3420, "total_steps": 5305, "loss": 0.0323, "lr": 1.6906267113145514e-06, "epoch": 3.223374175306315, "percentage": 64.47, "elapsed_time": "0:17:32", "remaining_time": "0:09:39", "throughput": 7562.75, "total_tokens": 7956064}
{"current_steps": 3425, "total_steps": 5305, "loss": 0.0, "lr": 1.6828483364175127e-06, "epoch": 3.22808671065033, "percentage": 64.56, "elapsed_time": "0:17:32", "remaining_time": "0:09:37", "throughput": 7569.42, "total_tokens": 7967264}
{"current_steps": 3430, "total_steps": 5305, "loss": 0.0003, "lr": 1.6750788081369951e-06, "epoch": 3.232799245994345, "percentage": 64.66, "elapsed_time": "0:17:33", "remaining_time": "0:09:35", "throughput": 7575.86, "total_tokens": 7978144}
{"current_steps": 3435, "total_steps": 5305, "loss": 0.0443, "lr": 1.6673182105871733e-06, "epoch": 3.23751178133836, "percentage": 64.75, "elapsed_time": "0:17:33", "remaining_time": "0:09:33", "throughput": 7582.39, "total_tokens": 7989152}
{"current_steps": 3440, "total_steps": 5305, "loss": 0.0, "lr": 1.659566627785536e-06, "epoch": 3.242224316682375, "percentage": 64.84, "elapsed_time": "0:17:34", "remaining_time": "0:09:31", "throughput": 7589.41, "total_tokens": 8000800}
{"current_steps": 3445, "total_steps": 5305, "loss": 0.0004, "lr": 1.651824143651975e-06, "epoch": 3.24693685202639, "percentage": 64.94, "elapsed_time": "0:17:34", "remaining_time": "0:09:29", "throughput": 7598.18, "total_tokens": 8014816}
{"current_steps": 3450, "total_steps": 5305, "loss": 0.0, "lr": 1.644090842007881e-06, "epoch": 3.251649387370405, "percentage": 65.03, "elapsed_time": "0:17:35", "remaining_time": "0:09:27", "throughput": 7604.19, "total_tokens": 8025120}
{"current_steps": 3455, "total_steps": 5305, "loss": 0.0, "lr": 1.6363668065752336e-06, "epoch": 3.2563619227144205, "percentage": 65.13, "elapsed_time": "0:17:35", "remaining_time": "0:09:25", "throughput": 7611.61, "total_tokens": 8037344}
{"current_steps": 3458, "total_steps": 5305, "eval_loss": 0.5191035270690918, "epoch": 3.2591894439208295, "percentage": 65.18, "elapsed_time": "0:17:38", "remaining_time": "0:09:25", "throughput": 7595.76, "total_tokens": 8043744}
{"current_steps": 3460, "total_steps": 5305, "loss": 0.0875, "lr": 1.6286521209756917e-06, "epoch": 3.2610744580584354, "percentage": 65.22, "elapsed_time": "0:18:25", "remaining_time": "0:09:49", "throughput": 7280.9, "total_tokens": 8048096}
{"current_steps": 3465, "total_steps": 5305, "loss": 0.0, "lr": 1.6209468687296947e-06, "epoch": 3.2657869934024504, "percentage": 65.32, "elapsed_time": "0:18:25", "remaining_time": "0:09:47", "throughput": 7288.84, "total_tokens": 8061344}
{"current_steps": 3470, "total_steps": 5305, "loss": 0.0, "lr": 1.613251133255554e-06, "epoch": 3.2704995287464658, "percentage": 65.41, "elapsed_time": "0:18:26", "remaining_time": "0:09:45", "throughput": 7295.85, "total_tokens": 8073184}
{"current_steps": 3475, "total_steps": 5305, "loss": 0.0, "lr": 1.6055649978685517e-06, "epoch": 3.2752120640904807, "percentage": 65.5, "elapsed_time": "0:18:27", "remaining_time": "0:09:42", "throughput": 7301.32, "total_tokens": 8082976}
{"current_steps": 3480, "total_steps": 5305, "loss": 0.0, "lr": 1.5978885457800348e-06, "epoch": 3.2799245994344957, "percentage": 65.6, "elapsed_time": "0:18:27", "remaining_time": "0:09:40", "throughput": 7308.16, "total_tokens": 8094624}
{"current_steps": 3485, "total_steps": 5305, "loss": 0.0843, "lr": 1.59022186009652e-06, "epoch": 3.284637134778511, "percentage": 65.69, "elapsed_time": "0:18:28", "remaining_time": "0:09:38", "throughput": 7314.03, "total_tokens": 8104928}
{"current_steps": 3490, "total_steps": 5305, "loss": 0.0, "lr": 1.5825650238187918e-06, "epoch": 3.289349670122526, "percentage": 65.79, "elapsed_time": "0:18:28", "remaining_time": "0:09:36", "throughput": 7321.09, "total_tokens": 8116896}
{"current_steps": 3495, "total_steps": 5305, "loss": 0.0875, "lr": 1.5749181198410014e-06, "epoch": 3.294062205466541, "percentage": 65.88, "elapsed_time": "0:18:29", "remaining_time": "0:09:34", "throughput": 7327.48, "total_tokens": 8127968}
{"current_steps": 3500, "total_steps": 5305, "loss": 0.0326, "lr": 1.5672812309497722e-06, "epoch": 3.298774740810556, "percentage": 65.98, "elapsed_time": "0:18:29", "remaining_time": "0:09:32", "throughput": 7334.51, "total_tokens": 8139936}
{"current_steps": 3505, "total_steps": 5305, "loss": 0.0001, "lr": 1.5596544398233028e-06, "epoch": 3.3034872761545713, "percentage": 66.07, "elapsed_time": "0:18:30", "remaining_time": "0:09:30", "throughput": 7341.16, "total_tokens": 8151392}
{"current_steps": 3510, "total_steps": 5305, "loss": 0.0, "lr": 1.5520378290304723e-06, "epoch": 3.308199811498586, "percentage": 66.16, "elapsed_time": "0:18:30", "remaining_time": "0:09:28", "throughput": 7349.52, "total_tokens": 8165280}
{"current_steps": 3515, "total_steps": 5305, "loss": 0.0, "lr": 1.544431481029944e-06, "epoch": 3.312912346842601, "percentage": 66.26, "elapsed_time": "0:18:31", "remaining_time": "0:09:26", "throughput": 7356.91, "total_tokens": 8177696}
{"current_steps": 3520, "total_steps": 5305, "loss": 0.0, "lr": 1.5368354781692764e-06, "epoch": 3.3176248821866166, "percentage": 66.35, "elapsed_time": "0:18:32", "remaining_time": "0:09:23", "throughput": 7363.62, "total_tokens": 8189280}
{"current_steps": 3525, "total_steps": 5305, "loss": 0.0001, "lr": 1.5292499026840292e-06, "epoch": 3.3223374175306315, "percentage": 66.45, "elapsed_time": "0:18:32", "remaining_time": "0:09:21", "throughput": 7371.7, "total_tokens": 8202784}
{"current_steps": 3530, "total_steps": 5305, "loss": 0.1032, "lr": 1.5216748366968743e-06, "epoch": 3.3270499528746464, "percentage": 66.54, "elapsed_time": "0:18:33", "remaining_time": "0:09:19", "throughput": 7379.58, "total_tokens": 8216032}
{"current_steps": 3535, "total_steps": 5305, "loss": 0.0001, "lr": 1.5141103622167042e-06, "epoch": 3.331762488218662, "percentage": 66.64, "elapsed_time": "0:18:33", "remaining_time": "0:09:17", "throughput": 7386.79, "total_tokens": 8228320}
{"current_steps": 3540, "total_steps": 5305, "loss": 0.0487, "lr": 1.5065565611377472e-06, "epoch": 3.336475023562677, "percentage": 66.73, "elapsed_time": "0:18:34", "remaining_time": "0:09:15", "throughput": 7393.88, "total_tokens": 8240416}
{"current_steps": 3545, "total_steps": 5305, "loss": 0.0, "lr": 1.4990135152386814e-06, "epoch": 3.3411875589066917, "percentage": 66.82, "elapsed_time": "0:18:35", "remaining_time": "0:09:13", "throughput": 7401.04, "total_tokens": 8252640}
{"current_steps": 3550, "total_steps": 5305, "loss": 0.0001, "lr": 1.4914813061817434e-06, "epoch": 3.345900094250707, "percentage": 66.92, "elapsed_time": "0:18:35", "remaining_time": "0:09:11", "throughput": 7406.13, "total_tokens": 8261984}
{"current_steps": 3555, "total_steps": 5305, "loss": 0.0036, "lr": 1.4839600155118525e-06, "epoch": 3.350612629594722, "percentage": 67.01, "elapsed_time": "0:18:36", "remaining_time": "0:09:09", "throughput": 7412.83, "total_tokens": 8273568}
{"current_steps": 3560, "total_steps": 5305, "loss": 0.0001, "lr": 1.4764497246557214e-06, "epoch": 3.355325164938737, "percentage": 67.11, "elapsed_time": "0:18:36", "remaining_time": "0:09:07", "throughput": 7419.76, "total_tokens": 8285472}
{"current_steps": 3565, "total_steps": 5305, "loss": 0.0008, "lr": 1.4689505149209788e-06, "epoch": 3.360037700282752, "percentage": 67.2, "elapsed_time": "0:18:37", "remaining_time": "0:09:05", "throughput": 7424.86, "total_tokens": 8294816}
{"current_steps": 3570, "total_steps": 5305, "loss": 0.0, "lr": 1.4614624674952843e-06, "epoch": 3.3647502356267673, "percentage": 67.3, "elapsed_time": "0:18:37", "remaining_time": "0:09:03", "throughput": 7430.89, "total_tokens": 8305504}
{"current_steps": 3575, "total_steps": 5305, "loss": 0.0518, "lr": 1.4539856634454558e-06, "epoch": 3.3694627709707823, "percentage": 67.39, "elapsed_time": "0:18:38", "remaining_time": "0:09:01", "throughput": 7436.97, "total_tokens": 8316320}
{"current_steps": 3580, "total_steps": 5305, "loss": 0.0384, "lr": 1.4465201837165876e-06, "epoch": 3.3741753063147972, "percentage": 67.48, "elapsed_time": "0:18:38", "remaining_time": "0:08:59", "throughput": 7443.04, "total_tokens": 8327200}
{"current_steps": 3585, "total_steps": 5305, "loss": 0.0, "lr": 1.4390661091311742e-06, "epoch": 3.3788878416588126, "percentage": 67.58, "elapsed_time": "0:18:39", "remaining_time": "0:08:57", "throughput": 7450.19, "total_tokens": 8339488}
{"current_steps": 3590, "total_steps": 5305, "loss": 0.0642, "lr": 1.4316235203882373e-06, "epoch": 3.3836003770028276, "percentage": 67.67, "elapsed_time": "0:18:39", "remaining_time": "0:08:55", "throughput": 7458.25, "total_tokens": 8353120}
{"current_steps": 3595, "total_steps": 5305, "loss": 0.0, "lr": 1.4241924980624485e-06, "epoch": 3.3883129123468425, "percentage": 67.77, "elapsed_time": "0:18:40", "remaining_time": "0:08:52", "throughput": 7464.93, "total_tokens": 8364768}
{"current_steps": 3600, "total_steps": 5305, "loss": 0.0029, "lr": 1.4167731226032656e-06, "epoch": 3.3930254476908575, "percentage": 67.86, "elapsed_time": "0:18:41", "remaining_time": "0:08:50", "throughput": 7471.66, "total_tokens": 8376480}
{"current_steps": 3605, "total_steps": 5305, "loss": 0.0122, "lr": 1.4093654743340462e-06, "epoch": 3.397737983034873, "percentage": 67.95, "elapsed_time": "0:18:41", "remaining_time": "0:08:48", "throughput": 7477.38, "total_tokens": 8386784}
{"current_steps": 3610, "total_steps": 5305, "loss": 0.0, "lr": 1.4019696334511962e-06, "epoch": 3.402450518378888, "percentage": 68.05, "elapsed_time": "0:18:42", "remaining_time": "0:08:46", "throughput": 7483.7, "total_tokens": 8397984}
{"current_steps": 3615, "total_steps": 5305, "loss": 0.0, "lr": 1.3945856800232874e-06, "epoch": 3.4071630537229027, "percentage": 68.14, "elapsed_time": "0:18:42", "remaining_time": "0:08:44", "throughput": 7489.57, "total_tokens": 8408544}
{"current_steps": 3620, "total_steps": 5305, "loss": 0.0, "lr": 1.3872136939902004e-06, "epoch": 3.411875589066918, "percentage": 68.24, "elapsed_time": "0:18:43", "remaining_time": "0:08:42", "throughput": 7495.77, "total_tokens": 8419552}
{"current_steps": 3625, "total_steps": 5305, "loss": 0.0001, "lr": 1.379853755162249e-06, "epoch": 3.416588124410933, "percentage": 68.33, "elapsed_time": "0:18:43", "remaining_time": "0:08:40", "throughput": 7501.32, "total_tokens": 8429664}
{"current_steps": 3630, "total_steps": 5305, "loss": 0.0, "lr": 1.3725059432193278e-06, "epoch": 3.421300659754948, "percentage": 68.43, "elapsed_time": "0:18:44", "remaining_time": "0:08:38", "throughput": 7508.01, "total_tokens": 8441376}
{"current_steps": 3635, "total_steps": 5305, "loss": 0.0, "lr": 1.3651703377100406e-06, "epoch": 3.4260131950989634, "percentage": 68.52, "elapsed_time": "0:18:44", "remaining_time": "0:08:36", "throughput": 7514.55, "total_tokens": 8452896}
{"current_steps": 3640, "total_steps": 5305, "loss": 0.0, "lr": 1.3578470180508432e-06, "epoch": 3.4307257304429783, "percentage": 68.61, "elapsed_time": "0:18:45", "remaining_time": "0:08:34", "throughput": 7520.3, "total_tokens": 8463328}
{"current_steps": 3645, "total_steps": 5305, "loss": 0.0, "lr": 1.3505360635251813e-06, "epoch": 3.4354382657869933, "percentage": 68.71, "elapsed_time": "0:18:45", "remaining_time": "0:08:32", "throughput": 7527.49, "total_tokens": 8475808}
{"current_steps": 3650, "total_steps": 5305, "loss": 0.0122, "lr": 1.3432375532826374e-06, "epoch": 3.4401508011310087, "percentage": 68.8, "elapsed_time": "0:18:46", "remaining_time": "0:08:30", "throughput": 7534.11, "total_tokens": 8487456}
{"current_steps": 3655, "total_steps": 5305, "loss": 0.0, "lr": 1.3359515663380668e-06, "epoch": 3.4448633364750236, "percentage": 68.9, "elapsed_time": "0:18:47", "remaining_time": "0:08:28", "throughput": 7543.81, "total_tokens": 8503712}
{"current_steps": 3660, "total_steps": 5305, "loss": 0.2188, "lr": 1.3286781815707465e-06, "epoch": 3.4495758718190386, "percentage": 68.99, "elapsed_time": "0:18:47", "remaining_time": "0:08:26", "throughput": 7550.05, "total_tokens": 8514848}
{"current_steps": 3665, "total_steps": 5305, "loss": 0.0985, "lr": 1.3214174777235192e-06, "epoch": 3.4542884071630535, "percentage": 69.09, "elapsed_time": "0:18:48", "remaining_time": "0:08:24", "throughput": 7555.57, "total_tokens": 8524960}
{"current_steps": 3670, "total_steps": 5305, "loss": 0.0001, "lr": 1.3141695334019453e-06, "epoch": 3.459000942507069, "percentage": 69.18, "elapsed_time": "0:18:48", "remaining_time": "0:08:22", "throughput": 7561.34, "total_tokens": 8535520}
{"current_steps": 3675, "total_steps": 5305, "loss": 0.0023, "lr": 1.3069344270734452e-06, "epoch": 3.463713477851084, "percentage": 69.27, "elapsed_time": "0:18:49", "remaining_time": "0:08:20", "throughput": 7566.27, "total_tokens": 8544864}
{"current_steps": 3680, "total_steps": 5305, "loss": 0.0001, "lr": 1.2997122370664538e-06, "epoch": 3.468426013195099, "percentage": 69.37, "elapsed_time": "0:18:49", "remaining_time": "0:08:18", "throughput": 7573.14, "total_tokens": 8556960}
{"current_steps": 3685, "total_steps": 5305, "loss": 0.0001, "lr": 1.2925030415695727e-06, "epoch": 3.473138548539114, "percentage": 69.46, "elapsed_time": "0:18:50", "remaining_time": "0:08:16", "throughput": 7579.18, "total_tokens": 8567968}
{"current_steps": 3690, "total_steps": 5305, "loss": 0.0595, "lr": 1.285306918630722e-06, "epoch": 3.477851083883129, "percentage": 69.56, "elapsed_time": "0:18:51", "remaining_time": "0:08:15", "throughput": 7587.33, "total_tokens": 8581920}
{"current_steps": 3695, "total_steps": 5305, "loss": 0.0442, "lr": 1.2781239461562966e-06, "epoch": 3.482563619227144, "percentage": 69.65, "elapsed_time": "0:18:51", "remaining_time": "0:08:13", "throughput": 7594.7, "total_tokens": 8594720}
{"current_steps": 3700, "total_steps": 5305, "loss": 0.0001, "lr": 1.2709542019103211e-06, "epoch": 3.4872761545711595, "percentage": 69.75, "elapsed_time": "0:18:52", "remaining_time": "0:08:11", "throughput": 7601.36, "total_tokens": 8606560}
{"current_steps": 3705, "total_steps": 5305, "loss": 0.0017, "lr": 1.2637977635136123e-06, "epoch": 3.4919886899151744, "percentage": 69.84, "elapsed_time": "0:18:52", "remaining_time": "0:08:09", "throughput": 7607.86, "total_tokens": 8618208}
{"current_steps": 3710, "total_steps": 5305, "loss": 0.0089, "lr": 1.2566547084429326e-06, "epoch": 3.4967012252591894, "percentage": 69.93, "elapsed_time": "0:18:53", "remaining_time": "0:08:07", "throughput": 7615.56, "total_tokens": 8631584}
{"current_steps": 3715, "total_steps": 5305, "loss": 0.0338, "lr": 1.2495251140301553e-06, "epoch": 3.5014137606032048, "percentage": 70.03, "elapsed_time": "0:18:53", "remaining_time": "0:08:05", "throughput": 7621.88, "total_tokens": 8642912}
{"current_steps": 3720, "total_steps": 5305, "loss": 0.0002, "lr": 1.2424090574614262e-06, "epoch": 3.5061262959472197, "percentage": 70.12, "elapsed_time": "0:18:54", "remaining_time": "0:08:03", "throughput": 7626.83, "total_tokens": 8652384}
{"current_steps": 3724, "total_steps": 5305, "eval_loss": 0.4635506868362427, "epoch": 3.5098963242224315, "percentage": 70.2, "elapsed_time": "0:18:57", "remaining_time": "0:08:02", "throughput": 7612.81, "total_tokens": 8660768}
{"current_steps": 3725, "total_steps": 5305, "loss": 0.0008, "lr": 1.2353066157763305e-06, "epoch": 3.5108388312912346, "percentage": 70.22, "elapsed_time": "0:19:39", "remaining_time": "0:08:20", "throughput": 7343.26, "total_tokens": 8662624}
{"current_steps": 3730, "total_steps": 5305, "loss": 0.0001, "lr": 1.2282178658670514e-06, "epoch": 3.5155513666352496, "percentage": 70.31, "elapsed_time": "0:19:40", "remaining_time": "0:08:18", "throughput": 7348.71, "total_tokens": 8672864}
{"current_steps": 3735, "total_steps": 5305, "loss": 0.0001, "lr": 1.221142884477548e-06, "epoch": 3.520263901979265, "percentage": 70.41, "elapsed_time": "0:19:40", "remaining_time": "0:08:16", "throughput": 7355.02, "total_tokens": 8684448}
{"current_steps": 3740, "total_steps": 5305, "loss": 0.0001, "lr": 1.2140817482027155e-06, "epoch": 3.52497643732328, "percentage": 70.5, "elapsed_time": "0:19:41", "remaining_time": "0:08:14", "throughput": 7362.93, "total_tokens": 8698336}
{"current_steps": 3745, "total_steps": 5305, "loss": 0.0, "lr": 1.207034533487564e-06, "epoch": 3.529688972667295, "percentage": 70.59, "elapsed_time": "0:19:41", "remaining_time": "0:08:12", "throughput": 7370.05, "total_tokens": 8711072}
{"current_steps": 3750, "total_steps": 5305, "loss": 0.0001, "lr": 1.2000013166263803e-06, "epoch": 3.5344015080113103, "percentage": 70.69, "elapsed_time": "0:19:42", "remaining_time": "0:08:10", "throughput": 7377.18, "total_tokens": 8723872}
{"current_steps": 3755, "total_steps": 5305, "loss": 0.0013, "lr": 1.1929821737619132e-06, "epoch": 3.539114043355325, "percentage": 70.78, "elapsed_time": "0:19:43", "remaining_time": "0:08:08", "throughput": 7383.71, "total_tokens": 8735776}
{"current_steps": 3760, "total_steps": 5305, "loss": 0.0, "lr": 1.1859771808845417e-06, "epoch": 3.54382657869934, "percentage": 70.88, "elapsed_time": "0:19:43", "remaining_time": "0:08:06", "throughput": 7393.57, "total_tokens": 8752736}
{"current_steps": 3765, "total_steps": 5305, "loss": 0.0001, "lr": 1.1789864138314577e-06, "epoch": 3.548539114043355, "percentage": 70.97, "elapsed_time": "0:19:44", "remaining_time": "0:08:04", "throughput": 7401.44, "total_tokens": 8766688}
{"current_steps": 3770, "total_steps": 5305, "loss": 0.0, "lr": 1.1720099482858364e-06, "epoch": 3.5532516493873705, "percentage": 71.07, "elapsed_time": "0:19:45", "remaining_time": "0:08:02", "throughput": 7409.9, "total_tokens": 8781536}
{"current_steps": 3775, "total_steps": 5305, "loss": 0.0001, "lr": 1.1650478597760284e-06, "epoch": 3.5579641847313854, "percentage": 71.16, "elapsed_time": "0:19:45", "remaining_time": "0:08:00", "throughput": 7415.58, "total_tokens": 8792224}
{"current_steps": 3780, "total_steps": 5305, "loss": 0.0704, "lr": 1.158100223674733e-06, "epoch": 3.562676720075401, "percentage": 71.25, "elapsed_time": "0:19:46", "remaining_time": "0:07:58", "throughput": 7421.42, "total_tokens": 8803168}
{"current_steps": 3785, "total_steps": 5305, "loss": 0.0001, "lr": 1.1511671151981861e-06, "epoch": 3.5673892554194158, "percentage": 71.35, "elapsed_time": "0:19:46", "remaining_time": "0:07:56", "throughput": 7426.89, "total_tokens": 8813536}
{"current_steps": 3790, "total_steps": 5305, "loss": 0.0, "lr": 1.1442486094053445e-06, "epoch": 3.5721017907634307, "percentage": 71.44, "elapsed_time": "0:19:47", "remaining_time": "0:07:54", "throughput": 7432.32, "total_tokens": 8823840}
{"current_steps": 3795, "total_steps": 5305, "loss": 0.0, "lr": 1.1373447811970762e-06, "epoch": 3.5768143261074457, "percentage": 71.54, "elapsed_time": "0:19:47", "remaining_time": "0:07:52", "throughput": 7439.34, "total_tokens": 8836576}
{"current_steps": 3800, "total_steps": 5305, "loss": 0.0, "lr": 1.130455705315345e-06, "epoch": 3.581526861451461, "percentage": 71.63, "elapsed_time": "0:19:48", "remaining_time": "0:07:50", "throughput": 7446.68, "total_tokens": 8849824}
{"current_steps": 3805, "total_steps": 5305, "loss": 0.1829, "lr": 1.1235814563424046e-06, "epoch": 3.586239396795476, "percentage": 71.72, "elapsed_time": "0:19:48", "remaining_time": "0:07:48", "throughput": 7452.29, "total_tokens": 8860448}
{"current_steps": 3810, "total_steps": 5305, "loss": 0.0001, "lr": 1.1167221086999897e-06, "epoch": 3.590951932139491, "percentage": 71.82, "elapsed_time": "0:19:49", "remaining_time": "0:07:46", "throughput": 7458.38, "total_tokens": 8871776}
{"current_steps": 3815, "total_steps": 5305, "loss": 0.0006, "lr": 1.10987773664851e-06, "epoch": 3.5956644674835063, "percentage": 71.91, "elapsed_time": "0:19:50", "remaining_time": "0:07:44", "throughput": 7466.21, "total_tokens": 8885728}
{"current_steps": 3820, "total_steps": 5305, "loss": 0.0, "lr": 1.1030484142862511e-06, "epoch": 3.6003770028275213, "percentage": 72.01, "elapsed_time": "0:19:50", "remaining_time": "0:07:42", "throughput": 7471.5, "total_tokens": 8895904}
{"current_steps": 3825, "total_steps": 5305, "loss": 0.0006, "lr": 1.0962342155485613e-06, "epoch": 3.605089538171536, "percentage": 72.1, "elapsed_time": "0:19:51", "remaining_time": "0:07:40", "throughput": 7477.93, "total_tokens": 8907808}
{"current_steps": 3830, "total_steps": 5305, "loss": 0.0, "lr": 1.0894352142070652e-06, "epoch": 3.609802073515551, "percentage": 72.2, "elapsed_time": "0:19:51", "remaining_time": "0:07:38", "throughput": 7483.51, "total_tokens": 8918432}
{"current_steps": 3835, "total_steps": 5305, "loss": 0.072, "lr": 1.0826514838688533e-06, "epoch": 3.6145146088595665, "percentage": 72.29, "elapsed_time": "0:19:52", "remaining_time": "0:07:37", "throughput": 7489.19, "total_tokens": 8929248}
{"current_steps": 3840, "total_steps": 5305, "loss": 0.0001, "lr": 1.075883097975691e-06, "epoch": 3.6192271442035815, "percentage": 72.38, "elapsed_time": "0:19:52", "remaining_time": "0:07:35", "throughput": 7495.1, "total_tokens": 8940384}
{"current_steps": 3845, "total_steps": 5305, "loss": 0.0, "lr": 1.0691301298032218e-06, "epoch": 3.623939679547597, "percentage": 72.48, "elapsed_time": "0:19:53", "remaining_time": "0:07:33", "throughput": 7500.51, "total_tokens": 8950816}
{"current_steps": 3850, "total_steps": 5305, "loss": 0.0001, "lr": 1.0623926524601771e-06, "epoch": 3.628652214891612, "percentage": 72.57, "elapsed_time": "0:19:53", "remaining_time": "0:07:31", "throughput": 7507.3, "total_tokens": 8963296}
{"current_steps": 3855, "total_steps": 5305, "loss": 0.0, "lr": 1.0556707388875786e-06, "epoch": 3.6333647502356268, "percentage": 72.67, "elapsed_time": "0:19:54", "remaining_time": "0:07:29", "throughput": 7513.32, "total_tokens": 8974624}
{"current_steps": 3860, "total_steps": 5305, "loss": 0.0596, "lr": 1.048964461857954e-06, "epoch": 3.6380772855796417, "percentage": 72.76, "elapsed_time": "0:19:55", "remaining_time": "0:07:27", "throughput": 7519.36, "total_tokens": 8985952}
{"current_steps": 3865, "total_steps": 5305, "loss": 0.0002, "lr": 1.0422738939745453e-06, "epoch": 3.6427898209236567, "percentage": 72.86, "elapsed_time": "0:19:55", "remaining_time": "0:07:25", "throughput": 7524.57, "total_tokens": 8996064}
{"current_steps": 3870, "total_steps": 5305, "loss": 0.0002, "lr": 1.035599107670529e-06, "epoch": 3.647502356267672, "percentage": 72.95, "elapsed_time": "0:19:56", "remaining_time": "0:07:23", "throughput": 7529.88, "total_tokens": 9006368}
{"current_steps": 3875, "total_steps": 5305, "loss": 0.0001, "lr": 1.0289401752082214e-06, "epoch": 3.652214891611687, "percentage": 73.04, "elapsed_time": "0:19:56", "remaining_time": "0:07:21", "throughput": 7536.25, "total_tokens": 9018272}
{"current_steps": 3880, "total_steps": 5305, "loss": 0.1112, "lr": 1.0222971686783089e-06, "epoch": 3.6569274269557024, "percentage": 73.14, "elapsed_time": "0:19:57", "remaining_time": "0:07:19", "throughput": 7542.14, "total_tokens": 9029472}
{"current_steps": 3885, "total_steps": 5305, "loss": 0.0001, "lr": 1.0156701599990562e-06, "epoch": 3.6616399622997173, "percentage": 73.23, "elapsed_time": "0:19:57", "remaining_time": "0:07:17", "throughput": 7548.81, "total_tokens": 9041824}
{"current_steps": 3890, "total_steps": 5305, "loss": 0.0381, "lr": 1.0090592209155373e-06, "epoch": 3.6663524976437323, "percentage": 73.33, "elapsed_time": "0:19:58", "remaining_time": "0:07:15", "throughput": 7555.81, "total_tokens": 9054752}
{"current_steps": 3895, "total_steps": 5305, "loss": 0.002, "lr": 1.0024644229988484e-06, "epoch": 3.6710650329877472, "percentage": 73.42, "elapsed_time": "0:19:58", "remaining_time": "0:07:14", "throughput": 7561.02, "total_tokens": 9064928}
{"current_steps": 3900, "total_steps": 5305, "loss": 0.0001, "lr": 9.95885837645344e-07, "epoch": 3.6757775683317626, "percentage": 73.52, "elapsed_time": "0:19:59", "remaining_time": "0:07:12", "throughput": 7567.0, "total_tokens": 9076256}
{"current_steps": 3905, "total_steps": 5305, "loss": 0.0954, "lr": 9.893235360758565e-07, "epoch": 3.6804901036757776, "percentage": 73.61, "elapsed_time": "0:19:59", "remaining_time": "0:07:10", "throughput": 7572.35, "total_tokens": 9086624}
{"current_steps": 3910, "total_steps": 5305, "loss": 0.0001, "lr": 9.827775893349273e-07, "epoch": 3.6852026390197925, "percentage": 73.7, "elapsed_time": "0:20:00", "remaining_time": "0:07:08", "throughput": 7578.21, "total_tokens": 9097824}
{"current_steps": 3915, "total_steps": 5305, "loss": 0.0323, "lr": 9.762480682900374e-07, "epoch": 3.689915174363808, "percentage": 73.8, "elapsed_time": "0:20:01", "remaining_time": "0:07:06", "throughput": 7582.95, "total_tokens": 9107296}
{"current_steps": 3920, "total_steps": 5305, "loss": 0.0039, "lr": 9.697350436308428e-07, "epoch": 3.694627709707823, "percentage": 73.89, "elapsed_time": "0:20:01", "remaining_time": "0:07:04", "throughput": 7589.19, "total_tokens": 9119008}
{"current_steps": 3925, "total_steps": 5305, "loss": 0.0001, "lr": 9.63238585868405e-07, "epoch": 3.699340245051838, "percentage": 73.99, "elapsed_time": "0:20:02", "remaining_time": "0:07:02", "throughput": 7595.77, "total_tokens": 9131296}
{"current_steps": 3930, "total_steps": 5305, "loss": 0.0001, "lr": 9.567587653344295e-07, "epoch": 3.7040527803958527, "percentage": 74.08, "elapsed_time": "0:20:02", "remaining_time": "0:07:00", "throughput": 7601.05, "total_tokens": 9141664}
{"current_steps": 3935, "total_steps": 5305, "loss": 0.0001, "lr": 9.502956521805054e-07, "epoch": 3.708765315739868, "percentage": 74.18, "elapsed_time": "0:20:03", "remaining_time": "0:06:58", "throughput": 7605.89, "total_tokens": 9151328}
{"current_steps": 3940, "total_steps": 5305, "loss": 0.0002, "lr": 9.438493163773433e-07, "epoch": 3.713477851083883, "percentage": 74.27, "elapsed_time": "0:20:03", "remaining_time": "0:06:57", "throughput": 7612.82, "total_tokens": 9164192}
{"current_steps": 3945, "total_steps": 5305, "loss": 0.0003, "lr": 9.374198277140237e-07, "epoch": 3.7181903864278985, "percentage": 74.36, "elapsed_time": "0:20:04", "remaining_time": "0:06:55", "throughput": 7619.38, "total_tokens": 9176544}
{"current_steps": 3950, "total_steps": 5305, "loss": 0.0162, "lr": 9.310072557972305e-07, "epoch": 3.7229029217719134, "percentage": 74.46, "elapsed_time": "0:20:04", "remaining_time": "0:06:53", "throughput": 7625.72, "total_tokens": 9188512}
{"current_steps": 3955, "total_steps": 5305, "loss": 0.0001, "lr": 9.246116700505109e-07, "epoch": 3.7276154571159283, "percentage": 74.55, "elapsed_time": "0:20:05", "remaining_time": "0:06:51", "throughput": 7632.36, "total_tokens": 9200992}
{"current_steps": 3960, "total_steps": 5305, "loss": 0.0001, "lr": 9.18233139713513e-07, "epoch": 3.7323279924599433, "percentage": 74.65, "elapsed_time": "0:20:06", "remaining_time": "0:06:49", "throughput": 7637.48, "total_tokens": 9211168}
{"current_steps": 3965, "total_steps": 5305, "loss": 0.0, "lr": 9.118717338412414e-07, "epoch": 3.7370405278039587, "percentage": 74.74, "elapsed_time": "0:20:06", "remaining_time": "0:06:47", "throughput": 7644.0, "total_tokens": 9223456}
{"current_steps": 3970, "total_steps": 5305, "loss": 0.0002, "lr": 9.055275213033077e-07, "epoch": 3.7417530631479736, "percentage": 74.84, "elapsed_time": "0:20:07", "remaining_time": "0:06:45", "throughput": 7649.12, "total_tokens": 9233632}
{"current_steps": 3975, "total_steps": 5305, "loss": 0.0751, "lr": 8.992005707831877e-07, "epoch": 3.7464655984919886, "percentage": 74.93, "elapsed_time": "0:20:07", "remaining_time": "0:06:44", "throughput": 7653.89, "total_tokens": 9243296}
{"current_steps": 3980, "total_steps": 5305, "loss": 0.0002, "lr": 8.928909507774741e-07, "epoch": 3.751178133836004, "percentage": 75.02, "elapsed_time": "0:20:08", "remaining_time": "0:06:42", "throughput": 7662.84, "total_tokens": 9259424}
{"current_steps": 3985, "total_steps": 5305, "loss": 0.0, "lr": 8.86598729595137e-07, "epoch": 3.755890669180019, "percentage": 75.12, "elapsed_time": "0:20:08", "remaining_time": "0:06:40", "throughput": 7669.39, "total_tokens": 9271840}
{"current_steps": 3990, "total_steps": 5305, "loss": 0.0, "lr": 8.80323975356783e-07, "epoch": 3.760603204524034, "percentage": 75.21, "elapsed_time": "0:20:09", "remaining_time": "0:06:38", "throughput": 7677.24, "total_tokens": 9286304}
{"current_steps": 3990, "total_steps": 5305, "eval_loss": 0.5201095938682556, "epoch": 3.760603204524034, "percentage": 75.21, "elapsed_time": "0:20:12", "remaining_time": "0:06:39", "throughput": 7659.55, "total_tokens": 9286304}
{"current_steps": 3995, "total_steps": 5305, "loss": 0.0004, "lr": 8.740667559939217e-07, "epoch": 3.765315739868049, "percentage": 75.31, "elapsed_time": "0:20:36", "remaining_time": "0:06:45", "throughput": 7516.18, "total_tokens": 9297056}
{"current_steps": 4000, "total_steps": 5305, "loss": 0.0, "lr": 8.678271392482243e-07, "epoch": 3.770028275212064, "percentage": 75.4, "elapsed_time": "0:20:37", "remaining_time": "0:06:43", "throughput": 7521.66, "total_tokens": 9307872}
{"current_steps": 4005, "total_steps": 5305, "loss": 0.0, "lr": 8.616051926707941e-07, "epoch": 3.774740810556079, "percentage": 75.49, "elapsed_time": "0:20:38", "remaining_time": "0:06:41", "throughput": 7527.22, "total_tokens": 9318816}
{"current_steps": 4010, "total_steps": 5305, "loss": 0.0308, "lr": 8.554009836214345e-07, "epoch": 3.7794533459000945, "percentage": 75.59, "elapsed_time": "0:20:38", "remaining_time": "0:06:39", "throughput": 7533.74, "total_tokens": 9331232}
{"current_steps": 4015, "total_steps": 5305, "loss": 0.0657, "lr": 8.49214579267921e-07, "epoch": 3.7841658812441095, "percentage": 75.68, "elapsed_time": "0:20:39", "remaining_time": "0:06:38", "throughput": 7539.17, "total_tokens": 9342112}
{"current_steps": 4020, "total_steps": 5305, "loss": 0.0, "lr": 8.430460465852683e-07, "epoch": 3.7888784165881244, "percentage": 75.78, "elapsed_time": "0:20:39", "remaining_time": "0:06:36", "throughput": 7546.5, "total_tokens": 9355872}
{"current_steps": 4025, "total_steps": 5305, "loss": 0.0, "lr": 8.368954523550146e-07, "epoch": 3.7935909519321394, "percentage": 75.87, "elapsed_time": "0:20:40", "remaining_time": "0:06:34", "throughput": 7552.15, "total_tokens": 9367008}
{"current_steps": 4030, "total_steps": 5305, "loss": 0.0001, "lr": 8.307628631644904e-07, "epoch": 3.7983034872761543, "percentage": 75.97, "elapsed_time": "0:20:40", "remaining_time": "0:06:32", "throughput": 7559.01, "total_tokens": 9380000}
{"current_steps": 4035, "total_steps": 5305, "loss": 0.0, "lr": 8.246483454061016e-07, "epoch": 3.8030160226201697, "percentage": 76.06, "elapsed_time": "0:20:41", "remaining_time": "0:06:30", "throughput": 7564.19, "total_tokens": 9390368}
{"current_steps": 4040, "total_steps": 5305, "loss": 0.0829, "lr": 8.185519652766091e-07, "epoch": 3.8077285579641846, "percentage": 76.15, "elapsed_time": "0:20:41", "remaining_time": "0:06:28", "throughput": 7570.09, "total_tokens": 9401952}
{"current_steps": 4045, "total_steps": 5305, "loss": 0.0, "lr": 8.124737887764148e-07, "epoch": 3.8124410933082, "percentage": 76.25, "elapsed_time": "0:20:42", "remaining_time": "0:06:27", "throughput": 7576.02, "total_tokens": 9413536}
{"current_steps": 4050, "total_steps": 5305, "loss": 0.09, "lr": 8.064138817088429e-07, "epoch": 3.817153628652215, "percentage": 76.34, "elapsed_time": "0:20:43", "remaining_time": "0:06:25", "throughput": 7581.77, "total_tokens": 9424864}
{"current_steps": 4055, "total_steps": 5305, "loss": 0.0, "lr": 8.003723096794314e-07, "epoch": 3.82186616399623, "percentage": 76.44, "elapsed_time": "0:20:43", "remaining_time": "0:06:23", "throughput": 7588.12, "total_tokens": 9437152}
{"current_steps": 4060, "total_steps": 5305, "loss": 0.0001, "lr": 7.94349138095219e-07, "epoch": 3.826578699340245, "percentage": 76.53, "elapsed_time": "0:20:44", "remaining_time": "0:06:21", "throughput": 7593.61, "total_tokens": 9448032}
{"current_steps": 4065, "total_steps": 5305, "loss": 0.0001, "lr": 7.883444321640383e-07, "epoch": 3.8312912346842602, "percentage": 76.63, "elapsed_time": "0:20:44", "remaining_time": "0:06:19", "throughput": 7599.38, "total_tokens": 9459424}
{"current_steps": 4070, "total_steps": 5305, "loss": 0.0001, "lr": 7.82358256893812e-07, "epoch": 3.836003770028275, "percentage": 76.72, "elapsed_time": "0:20:45", "remaining_time": "0:06:17", "throughput": 7604.33, "total_tokens": 9469536}
{"current_steps": 4075, "total_steps": 5305, "loss": 0.0, "lr": 7.763906770918428e-07, "epoch": 3.84071630537229, "percentage": 76.81, "elapsed_time": "0:20:45", "remaining_time": "0:06:16", "throughput": 7611.36, "total_tokens": 9482976}
{"current_steps": 4080, "total_steps": 5305, "loss": 0.0001, "lr": 7.704417573641196e-07, "epoch": 3.8454288407163055, "percentage": 76.91, "elapsed_time": "0:20:46", "remaining_time": "0:06:14", "throughput": 7616.08, "total_tokens": 9492704}
{"current_steps": 4085, "total_steps": 5305, "loss": 0.0, "lr": 7.645115621146116e-07, "epoch": 3.8501413760603205, "percentage": 77.0, "elapsed_time": "0:20:46", "remaining_time": "0:06:12", "throughput": 7622.31, "total_tokens": 9504864}
{"current_steps": 4090, "total_steps": 5305, "loss": 0.1079, "lr": 7.586001555445773e-07, "epoch": 3.8548539114043354, "percentage": 77.1, "elapsed_time": "0:20:47", "remaining_time": "0:06:10", "throughput": 7627.54, "total_tokens": 9515424}
{"current_steps": 4095, "total_steps": 5305, "loss": 0.0001, "lr": 7.527076016518603e-07, "epoch": 3.8595664467483504, "percentage": 77.19, "elapsed_time": "0:20:48", "remaining_time": "0:06:08", "throughput": 7632.67, "total_tokens": 9525792}
{"current_steps": 4100, "total_steps": 5305, "loss": 0.0001, "lr": 7.468339642302077e-07, "epoch": 3.8642789820923658, "percentage": 77.29, "elapsed_time": "0:20:48", "remaining_time": "0:06:06", "throughput": 7637.96, "total_tokens": 9536416}
{"current_steps": 4105, "total_steps": 5305, "loss": 0.0722, "lr": 7.409793068685709e-07, "epoch": 3.8689915174363807, "percentage": 77.38, "elapsed_time": "0:20:49", "remaining_time": "0:06:05", "throughput": 7645.63, "total_tokens": 9550880}
{"current_steps": 4110, "total_steps": 5305, "loss": 0.0, "lr": 7.351436929504203e-07, "epoch": 3.873704052780396, "percentage": 77.47, "elapsed_time": "0:20:49", "remaining_time": "0:06:03", "throughput": 7652.94, "total_tokens": 9564768}
{"current_steps": 4115, "total_steps": 5305, "loss": 0.0001, "lr": 7.293271856530585e-07, "epoch": 3.878416588124411, "percentage": 77.57, "elapsed_time": "0:20:50", "remaining_time": "0:06:01", "throughput": 7658.45, "total_tokens": 9575776}
{"current_steps": 4120, "total_steps": 5305, "loss": 0.0323, "lr": 7.235298479469391e-07, "epoch": 3.883129123468426, "percentage": 77.66, "elapsed_time": "0:20:50", "remaining_time": "0:05:59", "throughput": 7664.81, "total_tokens": 9588192}
{"current_steps": 4125, "total_steps": 5305, "loss": 0.0, "lr": 7.177517425949801e-07, "epoch": 3.887841658812441, "percentage": 77.76, "elapsed_time": "0:20:51", "remaining_time": "0:05:57", "throughput": 7669.81, "total_tokens": 9598432}
{"current_steps": 4130, "total_steps": 5305, "loss": 0.0001, "lr": 7.119929321518876e-07, "epoch": 3.8925541941564563, "percentage": 77.85, "elapsed_time": "0:20:52", "remaining_time": "0:05:56", "throughput": 7678.02, "total_tokens": 9613920}
{"current_steps": 4135, "total_steps": 5305, "loss": 0.0001, "lr": 7.062534789634772e-07, "epoch": 3.8972667295004713, "percentage": 77.95, "elapsed_time": "0:20:52", "remaining_time": "0:05:54", "throughput": 7683.41, "total_tokens": 9624864}
{"current_steps": 4140, "total_steps": 5305, "loss": 0.0004, "lr": 7.005334451660034e-07, "epoch": 3.901979264844486, "percentage": 78.04, "elapsed_time": "0:20:53", "remaining_time": "0:05:52", "throughput": 7688.47, "total_tokens": 9635232}
{"current_steps": 4145, "total_steps": 5305, "loss": 0.0, "lr": 6.948328926854767e-07, "epoch": 3.9066918001885016, "percentage": 78.13, "elapsed_time": "0:20:53", "remaining_time": "0:05:50", "throughput": 7695.38, "total_tokens": 9648544}
{"current_steps": 4150, "total_steps": 5305, "loss": 0.0074, "lr": 6.891518832370059e-07, "epoch": 3.9114043355325165, "percentage": 78.23, "elapsed_time": "0:20:54", "remaining_time": "0:05:49", "throughput": 7700.75, "total_tokens": 9659424}
{"current_steps": 4155, "total_steps": 5305, "loss": 0.0, "lr": 6.834904783241198e-07, "epoch": 3.9161168708765315, "percentage": 78.32, "elapsed_time": "0:20:54", "remaining_time": "0:05:47", "throughput": 7705.88, "total_tokens": 9669920}
{"current_steps": 4160, "total_steps": 5305, "loss": 0.0002, "lr": 6.778487392381089e-07, "epoch": 3.9208294062205464, "percentage": 78.42, "elapsed_time": "0:20:55", "remaining_time": "0:05:45", "throughput": 7711.61, "total_tokens": 9681376}
{"current_steps": 4165, "total_steps": 5305, "loss": 0.0, "lr": 6.722267270573529e-07, "epoch": 3.925541941564562, "percentage": 78.51, "elapsed_time": "0:20:55", "remaining_time": "0:05:43", "throughput": 7716.53, "total_tokens": 9691552}
{"current_steps": 4170, "total_steps": 5305, "loss": 0.0001, "lr": 6.666245026466708e-07, "epoch": 3.9302544769085768, "percentage": 78.61, "elapsed_time": "0:20:56", "remaining_time": "0:05:42", "throughput": 7723.09, "total_tokens": 9704288}
{"current_steps": 4175, "total_steps": 5305, "loss": 0.0595, "lr": 6.61042126656652e-07, "epoch": 3.934967012252592, "percentage": 78.7, "elapsed_time": "0:20:57", "remaining_time": "0:05:40", "throughput": 7727.69, "total_tokens": 9713952}
{"current_steps": 4180, "total_steps": 5305, "loss": 0.0642, "lr": 6.554796595230051e-07, "epoch": 3.939679547596607, "percentage": 78.79, "elapsed_time": "0:20:57", "remaining_time": "0:05:38", "throughput": 7732.9, "total_tokens": 9724576}
{"current_steps": 4185, "total_steps": 5305, "loss": 0.0002, "lr": 6.499371614659019e-07, "epoch": 3.944392082940622, "percentage": 78.89, "elapsed_time": "0:20:58", "remaining_time": "0:05:36", "throughput": 7738.24, "total_tokens": 9735392}
{"current_steps": 4190, "total_steps": 5305, "loss": 0.0766, "lr": 6.444146924893252e-07, "epoch": 3.949104618284637, "percentage": 78.98, "elapsed_time": "0:20:58", "remaining_time": "0:05:34", "throughput": 7743.35, "total_tokens": 9745888}
{"current_steps": 4195, "total_steps": 5305, "loss": 0.111, "lr": 6.389123123804217e-07, "epoch": 3.9538171536286524, "percentage": 79.08, "elapsed_time": "0:20:59", "remaining_time": "0:05:33", "throughput": 7747.62, "total_tokens": 9755104}
{"current_steps": 4200, "total_steps": 5305, "loss": 0.0003, "lr": 6.334300807088509e-07, "epoch": 3.9585296889726673, "percentage": 79.17, "elapsed_time": "0:20:59", "remaining_time": "0:05:31", "throughput": 7753.59, "total_tokens": 9766944}
{"current_steps": 4205, "total_steps": 5305, "loss": 0.0782, "lr": 6.279680568261423e-07, "epoch": 3.9632422243166823, "percentage": 79.26, "elapsed_time": "0:21:00", "remaining_time": "0:05:29", "throughput": 7759.23, "total_tokens": 9778336}
{"current_steps": 4210, "total_steps": 5305, "loss": 0.0004, "lr": 6.225262998650525e-07, "epoch": 3.9679547596606977, "percentage": 79.36, "elapsed_time": "0:21:00", "remaining_time": "0:05:27", "throughput": 7764.51, "total_tokens": 9789088}
{"current_steps": 4215, "total_steps": 5305, "loss": 0.0003, "lr": 6.171048687389273e-07, "epoch": 3.9726672950047126, "percentage": 79.45, "elapsed_time": "0:21:01", "remaining_time": "0:05:26", "throughput": 7769.44, "total_tokens": 9799392}
{"current_steps": 4220, "total_steps": 5305, "loss": 0.0, "lr": 6.117038221410568e-07, "epoch": 3.9773798303487276, "percentage": 79.55, "elapsed_time": "0:21:01", "remaining_time": "0:05:24", "throughput": 7775.43, "total_tokens": 9811360}
{"current_steps": 4225, "total_steps": 5305, "loss": 0.1016, "lr": 6.063232185440507e-07, "epoch": 3.9820923656927425, "percentage": 79.64, "elapsed_time": "0:21:02", "remaining_time": "0:05:22", "throughput": 7781.89, "total_tokens": 9824160}
{"current_steps": 4230, "total_steps": 5305, "loss": 0.0007, "lr": 6.009631161991958e-07, "epoch": 3.986804901036758, "percentage": 79.74, "elapsed_time": "0:21:02", "remaining_time": "0:05:20", "throughput": 7787.07, "total_tokens": 9834784}
{"current_steps": 4235, "total_steps": 5305, "loss": 0.0, "lr": 5.956235731358298e-07, "epoch": 3.991517436380773, "percentage": 79.83, "elapsed_time": "0:21:03", "remaining_time": "0:05:19", "throughput": 7792.54, "total_tokens": 9845920}
{"current_steps": 4240, "total_steps": 5305, "loss": 0.0, "lr": 5.903046471607121e-07, "epoch": 3.9962299717247878, "percentage": 79.92, "elapsed_time": "0:21:04", "remaining_time": "0:05:17", "throughput": 7798.71, "total_tokens": 9858208}
{"current_steps": 4245, "total_steps": 5305, "loss": 0.032, "lr": 5.850063958573993e-07, "epoch": 4.000942507068803, "percentage": 80.02, "elapsed_time": "0:21:04", "remaining_time": "0:05:15", "throughput": 7802.8, "total_tokens": 9868192}
{"current_steps": 4250, "total_steps": 5305, "loss": 0.0, "lr": 5.797288765856196e-07, "epoch": 4.005655042412818, "percentage": 80.11, "elapsed_time": "0:21:05", "remaining_time": "0:05:14", "throughput": 7810.28, "total_tokens": 9882784}
{"current_steps": 4255, "total_steps": 5305, "loss": 0.0001, "lr": 5.74472146480653e-07, "epoch": 4.010367577756833, "percentage": 80.21, "elapsed_time": "0:21:05", "remaining_time": "0:05:12", "throughput": 7814.81, "total_tokens": 9892448}
{"current_steps": 4256, "total_steps": 5305, "eval_loss": 0.5145591497421265, "epoch": 4.011310084825636, "percentage": 80.23, "elapsed_time": "0:21:08", "remaining_time": "0:05:12", "throughput": 7799.12, "total_tokens": 9894624}
{"current_steps": 4260, "total_steps": 5305, "loss": 0.0, "lr": 5.692362624527117e-07, "epoch": 4.015080113100848, "percentage": 80.3, "elapsed_time": "0:21:46", "remaining_time": "0:05:20", "throughput": 7581.51, "total_tokens": 9905376}
{"current_steps": 4265, "total_steps": 5305, "loss": 0.0, "lr": 5.640212811863277e-07, "epoch": 4.019792648444863, "percentage": 80.4, "elapsed_time": "0:21:47", "remaining_time": "0:05:18", "throughput": 7586.34, "total_tokens": 9915616}
{"current_steps": 4270, "total_steps": 5305, "loss": 0.0, "lr": 5.588272591397337e-07, "epoch": 4.024505183788879, "percentage": 80.49, "elapsed_time": "0:21:47", "remaining_time": "0:05:16", "throughput": 7592.6, "total_tokens": 9928288}
{"current_steps": 4275, "total_steps": 5305, "loss": 0.0001, "lr": 5.536542525442554e-07, "epoch": 4.029217719132894, "percentage": 80.58, "elapsed_time": "0:21:48", "remaining_time": "0:05:15", "throughput": 7597.81, "total_tokens": 9939232}
{"current_steps": 4280, "total_steps": 5305, "loss": 0.0, "lr": 5.485023174037005e-07, "epoch": 4.033930254476909, "percentage": 80.68, "elapsed_time": "0:21:48", "remaining_time": "0:05:13", "throughput": 7603.34, "total_tokens": 9950688}
{"current_steps": 4285, "total_steps": 5305, "loss": 0.0, "lr": 5.433715094937575e-07, "epoch": 4.038642789820924, "percentage": 80.77, "elapsed_time": "0:21:49", "remaining_time": "0:05:11", "throughput": 7608.7, "total_tokens": 9961824}
{"current_steps": 4290, "total_steps": 5305, "loss": 0.0, "lr": 5.382618843613827e-07, "epoch": 4.043355325164939, "percentage": 80.87, "elapsed_time": "0:21:49", "remaining_time": "0:05:09", "throughput": 7614.99, "total_tokens": 9974560}
{"current_steps": 4295, "total_steps": 5305, "loss": 0.0, "lr": 5.331734973242089e-07, "epoch": 4.0480678605089535, "percentage": 80.96, "elapsed_time": "0:21:50", "remaining_time": "0:05:08", "throughput": 7621.12, "total_tokens": 9987040}
{"current_steps": 4300, "total_steps": 5305, "loss": 0.0, "lr": 5.28106403469939e-07, "epoch": 4.0527803958529685, "percentage": 81.06, "elapsed_time": "0:21:51", "remaining_time": "0:05:06", "throughput": 7628.93, "total_tokens": 10002400}
{"current_steps": 4305, "total_steps": 5305, "loss": 0.0, "lr": 5.23060657655754e-07, "epoch": 4.057492931196984, "percentage": 81.15, "elapsed_time": "0:21:51", "remaining_time": "0:05:04", "throughput": 7633.6, "total_tokens": 10012448}
{"current_steps": 4310, "total_steps": 5305, "loss": 0.0001, "lr": 5.180363145077164e-07, "epoch": 4.062205466540999, "percentage": 81.24, "elapsed_time": "0:21:52", "remaining_time": "0:05:02", "throughput": 7638.79, "total_tokens": 10023392}
{"current_steps": 4315, "total_steps": 5305, "loss": 0.0002, "lr": 5.130334284201799e-07, "epoch": 4.066918001885014, "percentage": 81.34, "elapsed_time": "0:21:52", "remaining_time": "0:05:01", "throughput": 7644.1, "total_tokens": 10034528}
{"current_steps": 4320, "total_steps": 5305, "loss": 0.0, "lr": 5.080520535552028e-07, "epoch": 4.071630537229029, "percentage": 81.43, "elapsed_time": "0:21:53", "remaining_time": "0:04:59", "throughput": 7649.04, "total_tokens": 10045024}
{"current_steps": 4325, "total_steps": 5305, "loss": 0.0, "lr": 5.030922438419569e-07, "epoch": 4.076343072573044, "percentage": 81.53, "elapsed_time": "0:21:53", "remaining_time": "0:04:57", "throughput": 7653.82, "total_tokens": 10055328}
{"current_steps": 4330, "total_steps": 5305, "loss": 0.0, "lr": 4.981540529761473e-07, "epoch": 4.081055607917059, "percentage": 81.62, "elapsed_time": "0:21:54", "remaining_time": "0:04:55", "throughput": 7658.35, "total_tokens": 10065184}
{"current_steps": 4335, "total_steps": 5305, "loss": 0.0, "lr": 4.932375344194285e-07, "epoch": 4.085768143261075, "percentage": 81.72, "elapsed_time": "0:21:54", "remaining_time": "0:04:54", "throughput": 7664.11, "total_tokens": 10077088}
{"current_steps": 4340, "total_steps": 5305, "loss": 0.0, "lr": 4.88342741398831e-07, "epoch": 4.09048067860509, "percentage": 81.81, "elapsed_time": "0:21:55", "remaining_time": "0:04:52", "throughput": 7669.17, "total_tokens": 10087840}
{"current_steps": 4345, "total_steps": 5305, "loss": 0.0, "lr": 4.83469726906175e-07, "epoch": 4.095193213949105, "percentage": 81.9, "elapsed_time": "0:21:55", "remaining_time": "0:04:50", "throughput": 7674.26, "total_tokens": 10098656}
{"current_steps": 4350, "total_steps": 5305, "loss": 0.0, "lr": 4.786185436975085e-07, "epoch": 4.09990574929312, "percentage": 82.0, "elapsed_time": "0:21:56", "remaining_time": "0:04:49", "throughput": 7680.54, "total_tokens": 10111456}
{"current_steps": 4355, "total_steps": 5305, "loss": 0.0, "lr": 4.7378924429252735e-07, "epoch": 4.104618284637135, "percentage": 82.09, "elapsed_time": "0:21:57", "remaining_time": "0:04:47", "throughput": 7686.0, "total_tokens": 10122912}
{"current_steps": 4360, "total_steps": 5305, "loss": 0.0003, "lr": 4.689818809740118e-07, "epoch": 4.10933081998115, "percentage": 82.19, "elapsed_time": "0:21:57", "remaining_time": "0:04:45", "throughput": 7691.87, "total_tokens": 10135072}
{"current_steps": 4365, "total_steps": 5305, "loss": 0.0001, "lr": 4.641965057872552e-07, "epoch": 4.1140433553251645, "percentage": 82.28, "elapsed_time": "0:21:58", "remaining_time": "0:04:43", "throughput": 7696.87, "total_tokens": 10145760}
{"current_steps": 4370, "total_steps": 5305, "loss": 0.0001, "lr": 4.594331705395078e-07, "epoch": 4.11875589066918, "percentage": 82.38, "elapsed_time": "0:21:58", "remaining_time": "0:04:42", "throughput": 7701.61, "total_tokens": 10156000}
{"current_steps": 4375, "total_steps": 5305, "loss": 0.0, "lr": 4.5469192679940905e-07, "epoch": 4.123468426013195, "percentage": 82.47, "elapsed_time": "0:21:59", "remaining_time": "0:04:40", "throughput": 7707.81, "total_tokens": 10168736}
{"current_steps": 4380, "total_steps": 5305, "loss": 0.0, "lr": 4.4997282589643363e-07, "epoch": 4.12818096135721, "percentage": 82.56, "elapsed_time": "0:21:59", "remaining_time": "0:04:38", "throughput": 7713.99, "total_tokens": 10181408}
{"current_steps": 4385, "total_steps": 5305, "loss": 0.0, "lr": 4.4527591892033263e-07, "epoch": 4.132893496701225, "percentage": 82.66, "elapsed_time": "0:22:00", "remaining_time": "0:04:37", "throughput": 7718.85, "total_tokens": 10191904}
{"current_steps": 4390, "total_steps": 5305, "loss": 0.0, "lr": 4.406012567205847e-07, "epoch": 4.13760603204524, "percentage": 82.75, "elapsed_time": "0:22:00", "remaining_time": "0:04:35", "throughput": 7723.53, "total_tokens": 10202080}
{"current_steps": 4395, "total_steps": 5305, "loss": 0.0, "lr": 4.359488899058409e-07, "epoch": 4.142318567389255, "percentage": 82.85, "elapsed_time": "0:22:01", "remaining_time": "0:04:33", "throughput": 7728.09, "total_tokens": 10212064}
{"current_steps": 4400, "total_steps": 5305, "loss": 0.0, "lr": 4.313188688433792e-07, "epoch": 4.147031102733271, "percentage": 82.94, "elapsed_time": "0:22:01", "remaining_time": "0:04:31", "throughput": 7733.26, "total_tokens": 10223136}
{"current_steps": 4405, "total_steps": 5305, "loss": 0.0, "lr": 4.2671124365855853e-07, "epoch": 4.151743638077286, "percentage": 83.03, "elapsed_time": "0:22:02", "remaining_time": "0:04:30", "throughput": 7740.91, "total_tokens": 10238432}
{"current_steps": 4410, "total_steps": 5305, "loss": 0.0252, "lr": 4.2212606423427867e-07, "epoch": 4.156456173421301, "percentage": 83.13, "elapsed_time": "0:22:03", "remaining_time": "0:04:28", "throughput": 7746.82, "total_tokens": 10250784}
{"current_steps": 4415, "total_steps": 5305, "loss": 0.0, "lr": 4.175633802104337e-07, "epoch": 4.161168708765316, "percentage": 83.22, "elapsed_time": "0:22:03", "remaining_time": "0:04:26", "throughput": 7754.14, "total_tokens": 10265440}
{"current_steps": 4420, "total_steps": 5305, "loss": 0.0, "lr": 4.1302324098338315e-07, "epoch": 4.165881244109331, "percentage": 83.32, "elapsed_time": "0:22:04", "remaining_time": "0:04:25", "throughput": 7759.43, "total_tokens": 10276704}
{"current_steps": 4425, "total_steps": 5305, "loss": 0.0, "lr": 4.0850569570541036e-07, "epoch": 4.170593779453346, "percentage": 83.41, "elapsed_time": "0:22:04", "remaining_time": "0:04:23", "throughput": 7763.83, "total_tokens": 10286496}
{"current_steps": 4430, "total_steps": 5305, "loss": 0.0, "lr": 4.0401079328419384e-07, "epoch": 4.175306314797361, "percentage": 83.51, "elapsed_time": "0:22:05", "remaining_time": "0:04:21", "throughput": 7768.93, "total_tokens": 10297376}
{"current_steps": 4435, "total_steps": 5305, "loss": 0.0, "lr": 3.995385823822767e-07, "epoch": 4.180018850141376, "percentage": 83.6, "elapsed_time": "0:22:05", "remaining_time": "0:04:20", "throughput": 7773.21, "total_tokens": 10306976}
{"current_steps": 4440, "total_steps": 5305, "loss": 0.0, "lr": 3.9508911141653896e-07, "epoch": 4.184731385485391, "percentage": 83.69, "elapsed_time": "0:22:06", "remaining_time": "0:04:18", "throughput": 7778.88, "total_tokens": 10318880}
{"current_steps": 4445, "total_steps": 5305, "loss": 0.0001, "lr": 3.906624285576771e-07, "epoch": 4.189443920829406, "percentage": 83.79, "elapsed_time": "0:22:07", "remaining_time": "0:04:16", "throughput": 7784.5, "total_tokens": 10330784}
{"current_steps": 4450, "total_steps": 5305, "loss": 0.0, "lr": 3.862585817296771e-07, "epoch": 4.194156456173421, "percentage": 83.88, "elapsed_time": "0:22:07", "remaining_time": "0:04:15", "throughput": 7789.22, "total_tokens": 10341088}
{"current_steps": 4455, "total_steps": 5305, "loss": 0.0, "lr": 3.8187761860929956e-07, "epoch": 4.198868991517436, "percentage": 83.98, "elapsed_time": "0:22:08", "remaining_time": "0:04:13", "throughput": 7794.33, "total_tokens": 10352096}
{"current_steps": 4460, "total_steps": 5305, "loss": 0.0, "lr": 3.775195866255618e-07, "epoch": 4.203581526861451, "percentage": 84.07, "elapsed_time": "0:22:08", "remaining_time": "0:04:11", "throughput": 7800.24, "total_tokens": 10364448}
{"current_steps": 4465, "total_steps": 5305, "loss": 0.0, "lr": 3.731845329592268e-07, "epoch": 4.208294062205466, "percentage": 84.17, "elapsed_time": "0:22:09", "remaining_time": "0:04:10", "throughput": 7806.17, "total_tokens": 10376928}
{"current_steps": 4470, "total_steps": 5305, "loss": 0.0, "lr": 3.6887250454228666e-07, "epoch": 4.213006597549482, "percentage": 84.26, "elapsed_time": "0:22:09", "remaining_time": "0:04:08", "throughput": 7812.01, "total_tokens": 10389216}
{"current_steps": 4475, "total_steps": 5305, "loss": 0.0, "lr": 3.6458354805746304e-07, "epoch": 4.217719132893497, "percentage": 84.35, "elapsed_time": "0:22:10", "remaining_time": "0:04:06", "throughput": 7820.96, "total_tokens": 10406944}
{"current_steps": 4480, "total_steps": 5305, "loss": 0.0, "lr": 3.603177099376931e-07, "epoch": 4.222431668237512, "percentage": 84.45, "elapsed_time": "0:22:11", "remaining_time": "0:04:05", "throughput": 7825.9, "total_tokens": 10417760}
{"current_steps": 4485, "total_steps": 5305, "loss": 0.0, "lr": 3.5607503636563484e-07, "epoch": 4.227144203581527, "percentage": 84.54, "elapsed_time": "0:22:11", "remaining_time": "0:04:03", "throughput": 7831.24, "total_tokens": 10429216}
{"current_steps": 4490, "total_steps": 5305, "loss": 0.0, "lr": 3.5185557327315797e-07, "epoch": 4.231856738925542, "percentage": 84.64, "elapsed_time": "0:22:12", "remaining_time": "0:04:01", "throughput": 7837.81, "total_tokens": 10442784}
{"current_steps": 4495, "total_steps": 5305, "loss": 0.0, "lr": 3.47659366340857e-07, "epoch": 4.236569274269557, "percentage": 84.73, "elapsed_time": "0:22:12", "remaining_time": "0:04:00", "throughput": 7843.26, "total_tokens": 10454496}
{"current_steps": 4500, "total_steps": 5305, "loss": 0.0, "lr": 3.43486460997548e-07, "epoch": 4.2412818096135725, "percentage": 84.83, "elapsed_time": "0:22:13", "remaining_time": "0:03:58", "throughput": 7848.93, "total_tokens": 10466464}
{"current_steps": 4505, "total_steps": 5305, "loss": 0.0, "lr": 3.393369024197826e-07, "epoch": 4.245994344957587, "percentage": 84.92, "elapsed_time": "0:22:14", "remaining_time": "0:03:56", "throughput": 7853.57, "total_tokens": 10476768}
{"current_steps": 4510, "total_steps": 5305, "loss": 0.0, "lr": 3.352107355313536e-07, "epoch": 4.250706880301602, "percentage": 85.01, "elapsed_time": "0:22:14", "remaining_time": "0:03:55", "throughput": 7858.37, "total_tokens": 10487392}
{"current_steps": 4515, "total_steps": 5305, "loss": 0.0, "lr": 3.311080050028148e-07, "epoch": 4.255419415645617, "percentage": 85.11, "elapsed_time": "0:22:15", "remaining_time": "0:03:53", "throughput": 7863.26, "total_tokens": 10498144}
{"current_steps": 4520, "total_steps": 5305, "loss": 0.0782, "lr": 3.2702875525099235e-07, "epoch": 4.260131950989632, "percentage": 85.2, "elapsed_time": "0:22:15", "remaining_time": "0:03:51", "throughput": 7867.51, "total_tokens": 10507808}
{"current_steps": 4522, "total_steps": 5305, "eval_loss": 0.5548250675201416, "epoch": 4.262016965127239, "percentage": 85.24, "elapsed_time": "0:22:18", "remaining_time": "0:03:51", "throughput": 7853.49, "total_tokens": 10512416}
{"current_steps": 4525, "total_steps": 5305, "loss": 0.0, "lr": 3.2297303043850564e-07, "epoch": 4.264844486333647, "percentage": 85.3, "elapsed_time": "0:22:45", "remaining_time": "0:03:55", "throughput": 7700.29, "total_tokens": 10517408}
{"current_steps": 4530, "total_steps": 5305, "loss": 0.0, "lr": 3.189408744732897e-07, "epoch": 4.269557021677663, "percentage": 85.39, "elapsed_time": "0:22:46", "remaining_time": "0:03:53", "throughput": 7705.3, "total_tokens": 10528416}
{"current_steps": 4535, "total_steps": 5305, "loss": 0.0, "lr": 3.149323310081201e-07, "epoch": 4.274269557021678, "percentage": 85.49, "elapsed_time": "0:22:46", "remaining_time": "0:03:52", "throughput": 7711.3, "total_tokens": 10541216}
{"current_steps": 4540, "total_steps": 5305, "loss": 0.0, "lr": 3.1094744344013855e-07, "epoch": 4.278982092365693, "percentage": 85.58, "elapsed_time": "0:22:47", "remaining_time": "0:03:50", "throughput": 7717.32, "total_tokens": 10554016}
{"current_steps": 4545, "total_steps": 5305, "loss": 0.0, "lr": 3.069862549103841e-07, "epoch": 4.283694627709708, "percentage": 85.67, "elapsed_time": "0:22:48", "remaining_time": "0:03:48", "throughput": 7721.44, "total_tokens": 10563552}
{"current_steps": 4550, "total_steps": 5305, "loss": 0.0, "lr": 3.030488083033273e-07, "epoch": 4.288407163053723, "percentage": 85.77, "elapsed_time": "0:22:48", "remaining_time": "0:03:47", "throughput": 7727.44, "total_tokens": 10576288}
{"current_steps": 4555, "total_steps": 5305, "loss": 0.0, "lr": 2.991351462464037e-07, "epoch": 4.293119698397738, "percentage": 85.86, "elapsed_time": "0:22:49", "remaining_time": "0:03:45", "throughput": 7732.14, "total_tokens": 10586784}
{"current_steps": 4560, "total_steps": 5305, "loss": 0.0, "lr": 2.9524531110955406e-07, "epoch": 4.297832233741753, "percentage": 85.96, "elapsed_time": "0:22:49", "remaining_time": "0:03:43", "throughput": 7737.14, "total_tokens": 10597792}
{"current_steps": 4565, "total_steps": 5305, "loss": 0.0, "lr": 2.913793450047639e-07, "epoch": 4.3025447690857686, "percentage": 86.05, "elapsed_time": "0:22:50", "remaining_time": "0:03:42", "throughput": 7743.27, "total_tokens": 10610720}
{"current_steps": 4570, "total_steps": 5305, "loss": 0.0, "lr": 2.875372897856113e-07, "epoch": 4.3072573044297835, "percentage": 86.15, "elapsed_time": "0:22:50", "remaining_time": "0:03:40", "throughput": 7748.52, "total_tokens": 10622176}
{"current_steps": 4575, "total_steps": 5305, "loss": 0.0, "lr": 2.837191870468084e-07, "epoch": 4.311969839773798, "percentage": 86.24, "elapsed_time": "0:22:51", "remaining_time": "0:03:38", "throughput": 7753.31, "total_tokens": 10632864}
{"current_steps": 4580, "total_steps": 5305, "loss": 0.0039, "lr": 2.7992507812375557e-07, "epoch": 4.316682375117813, "percentage": 86.33, "elapsed_time": "0:22:51", "remaining_time": "0:03:37", "throughput": 7757.65, "total_tokens": 10642784}
{"current_steps": 4585, "total_steps": 5305, "loss": 0.0153, "lr": 2.76155004092091e-07, "epoch": 4.321394910461828, "percentage": 86.43, "elapsed_time": "0:22:52", "remaining_time": "0:03:35", "throughput": 7762.1, "total_tokens": 10652896}
{"current_steps": 4590, "total_steps": 5305, "loss": 0.1078, "lr": 2.7240900576724904e-07, "epoch": 4.326107445805843, "percentage": 86.52, "elapsed_time": "0:22:52", "remaining_time": "0:03:33", "throughput": 7767.85, "total_tokens": 10665248}
{"current_steps": 4595, "total_steps": 5305, "loss": 0.0001, "lr": 2.686871237040151e-07, "epoch": 4.330819981149858, "percentage": 86.62, "elapsed_time": "0:22:53", "remaining_time": "0:03:32", "throughput": 7772.88, "total_tokens": 10676384}
{"current_steps": 4600, "total_steps": 5305, "loss": 0.0, "lr": 2.6498939819608827e-07, "epoch": 4.335532516493874, "percentage": 86.71, "elapsed_time": "0:22:54", "remaining_time": "0:03:30", "throughput": 7778.41, "total_tokens": 10688352}
{"current_steps": 4605, "total_steps": 5305, "loss": 0.0, "lr": 2.613158692756443e-07, "epoch": 4.340245051837889, "percentage": 86.8, "elapsed_time": "0:22:54", "remaining_time": "0:03:28", "throughput": 7782.63, "total_tokens": 10698080}
{"current_steps": 4610, "total_steps": 5305, "loss": 0.0, "lr": 2.576665767129055e-07, "epoch": 4.344957587181904, "percentage": 86.9, "elapsed_time": "0:22:55", "remaining_time": "0:03:27", "throughput": 7788.52, "total_tokens": 10710816}
{"current_steps": 4615, "total_steps": 5305, "loss": 0.0, "lr": 2.5404156001570257e-07, "epoch": 4.349670122525919, "percentage": 86.99, "elapsed_time": "0:22:55", "remaining_time": "0:03:25", "throughput": 7793.88, "total_tokens": 10722592}
{"current_steps": 4620, "total_steps": 5305, "loss": 0.0, "lr": 2.5044085842905686e-07, "epoch": 4.354382657869934, "percentage": 87.09, "elapsed_time": "0:22:56", "remaining_time": "0:03:24", "throughput": 7799.44, "total_tokens": 10734752}
{"current_steps": 4625, "total_steps": 5305, "loss": 0.0001, "lr": 2.4686451093474673e-07, "epoch": 4.359095193213949, "percentage": 87.18, "elapsed_time": "0:22:56", "remaining_time": "0:03:22", "throughput": 7804.77, "total_tokens": 10746464}
{"current_steps": 4630, "total_steps": 5305, "loss": 0.0, "lr": 2.433125562508917e-07, "epoch": 4.363807728557964, "percentage": 87.28, "elapsed_time": "0:22:57", "remaining_time": "0:03:20", "throughput": 7809.7, "total_tokens": 10757472}
{"current_steps": 4635, "total_steps": 5305, "loss": 0.1078, "lr": 2.3978503283152847e-07, "epoch": 4.36852026390198, "percentage": 87.37, "elapsed_time": "0:22:58", "remaining_time": "0:03:19", "throughput": 7814.96, "total_tokens": 10769056}
{"current_steps": 4640, "total_steps": 5305, "loss": 0.0, "lr": 2.3628197886619852e-07, "epoch": 4.3732327992459945, "percentage": 87.46, "elapsed_time": "0:22:58", "remaining_time": "0:03:17", "throughput": 7820.06, "total_tokens": 10780384}
{"current_steps": 4645, "total_steps": 5305, "loss": 0.0, "lr": 2.3280343227953305e-07, "epoch": 4.3779453345900095, "percentage": 87.56, "elapsed_time": "0:22:59", "remaining_time": "0:03:15", "throughput": 7825.81, "total_tokens": 10792928}
{"current_steps": 4650, "total_steps": 5305, "loss": 0.0, "lr": 2.293494307308411e-07, "epoch": 4.382657869934024, "percentage": 87.65, "elapsed_time": "0:22:59", "remaining_time": "0:03:14", "throughput": 7830.65, "total_tokens": 10803808}
{"current_steps": 4655, "total_steps": 5305, "loss": 0.0, "lr": 2.2592001161370392e-07, "epoch": 4.387370405278039, "percentage": 87.75, "elapsed_time": "0:23:00", "remaining_time": "0:03:12", "throughput": 7835.39, "total_tokens": 10814496}
{"current_steps": 4660, "total_steps": 5305, "loss": 0.0, "lr": 2.2251521205557042e-07, "epoch": 4.392082940622054, "percentage": 87.84, "elapsed_time": "0:23:00", "remaining_time": "0:03:11", "throughput": 7841.25, "total_tokens": 10827168}
{"current_steps": 4665, "total_steps": 5305, "loss": 0.0, "lr": 2.1913506891735242e-07, "epoch": 4.39679547596607, "percentage": 87.94, "elapsed_time": "0:23:01", "remaining_time": "0:03:09", "throughput": 7846.85, "total_tokens": 10839392}
{"current_steps": 4670, "total_steps": 5305, "loss": 0.0, "lr": 2.1577961879302807e-07, "epoch": 4.401508011310085, "percentage": 88.03, "elapsed_time": "0:23:01", "remaining_time": "0:03:07", "throughput": 7852.5, "total_tokens": 10851744}
{"current_steps": 4675, "total_steps": 5305, "loss": 0.0, "lr": 2.124488980092454e-07, "epoch": 4.4062205466541, "percentage": 88.12, "elapsed_time": "0:23:02", "remaining_time": "0:03:06", "throughput": 7858.44, "total_tokens": 10864608}
{"current_steps": 4680, "total_steps": 5305, "loss": 0.0, "lr": 2.0914294262492723e-07, "epoch": 4.410933081998115, "percentage": 88.22, "elapsed_time": "0:23:03", "remaining_time": "0:03:04", "throughput": 7864.59, "total_tokens": 10877856}
{"current_steps": 4685, "total_steps": 5305, "loss": 0.0044, "lr": 2.0586178843088473e-07, "epoch": 4.41564561734213, "percentage": 88.31, "elapsed_time": "0:23:03", "remaining_time": "0:03:03", "throughput": 7871.04, "total_tokens": 10891616}
{"current_steps": 4690, "total_steps": 5305, "loss": 0.0, "lr": 2.026054709494235e-07, "epoch": 4.420358152686145, "percentage": 88.41, "elapsed_time": "0:23:04", "remaining_time": "0:03:01", "throughput": 7874.97, "total_tokens": 10901024}
{"current_steps": 4695, "total_steps": 5305, "loss": 0.0, "lr": 1.9937402543396683e-07, "epoch": 4.425070688030161, "percentage": 88.5, "elapsed_time": "0:23:04", "remaining_time": "0:02:59", "throughput": 7879.0, "total_tokens": 10910560}
{"current_steps": 4700, "total_steps": 5305, "loss": 0.0, "lr": 1.961674868686675e-07, "epoch": 4.429783223374176, "percentage": 88.6, "elapsed_time": "0:23:05", "remaining_time": "0:02:58", "throughput": 7884.01, "total_tokens": 10921824}
{"current_steps": 4705, "total_steps": 5305, "loss": 0.0, "lr": 1.929858899680323e-07, "epoch": 4.434495758718191, "percentage": 88.69, "elapsed_time": "0:23:05", "remaining_time": "0:02:56", "throughput": 7890.05, "total_tokens": 10934944}
{"current_steps": 4710, "total_steps": 5305, "loss": 0.0922, "lr": 1.8982926917654575e-07, "epoch": 4.4392082940622055, "percentage": 88.78, "elapsed_time": "0:23:06", "remaining_time": "0:02:55", "throughput": 7895.14, "total_tokens": 10946400}
{"current_steps": 4715, "total_steps": 5305, "loss": 0.0, "lr": 1.8669765866829724e-07, "epoch": 4.4439208294062205, "percentage": 88.88, "elapsed_time": "0:23:07", "remaining_time": "0:02:53", "throughput": 7900.41, "total_tokens": 10958112}
{"current_steps": 4720, "total_steps": 5305, "loss": 0.0, "lr": 1.835910923466097e-07, "epoch": 4.448633364750235, "percentage": 88.97, "elapsed_time": "0:23:07", "remaining_time": "0:02:51", "throughput": 7906.04, "total_tokens": 10970528}
{"current_steps": 4725, "total_steps": 5305, "loss": 0.0, "lr": 1.805096038436749e-07, "epoch": 4.45334590009425, "percentage": 89.07, "elapsed_time": "0:23:08", "remaining_time": "0:02:50", "throughput": 7911.19, "total_tokens": 10982048}
{"current_steps": 4730, "total_steps": 5305, "loss": 0.0, "lr": 1.774532265201867e-07, "epoch": 4.458058435438266, "percentage": 89.16, "elapsed_time": "0:23:08", "remaining_time": "0:02:48", "throughput": 7917.03, "total_tokens": 10994848}
{"current_steps": 4735, "total_steps": 5305, "loss": 0.0001, "lr": 1.7442199346498294e-07, "epoch": 4.462770970782281, "percentage": 89.26, "elapsed_time": "0:23:09", "remaining_time": "0:02:47", "throughput": 7921.36, "total_tokens": 11004896}
{"current_steps": 4740, "total_steps": 5305, "loss": 0.0, "lr": 1.7141593749468361e-07, "epoch": 4.467483506126296, "percentage": 89.35, "elapsed_time": "0:23:09", "remaining_time": "0:02:45", "throughput": 7926.85, "total_tokens": 11017056}
{"current_steps": 4745, "total_steps": 5305, "loss": 0.0, "lr": 1.6843509115333917e-07, "epoch": 4.472196041470311, "percentage": 89.44, "elapsed_time": "0:23:10", "remaining_time": "0:02:44", "throughput": 7931.04, "total_tokens": 11026912}
{"current_steps": 4750, "total_steps": 5305, "loss": 0.0, "lr": 1.6547948671207515e-07, "epoch": 4.476908576814326, "percentage": 89.54, "elapsed_time": "0:23:10", "remaining_time": "0:02:42", "throughput": 7936.04, "total_tokens": 11038176}
{"current_steps": 4755, "total_steps": 5305, "loss": 0.0, "lr": 1.6254915616874645e-07, "epoch": 4.481621112158341, "percentage": 89.63, "elapsed_time": "0:23:11", "remaining_time": "0:02:40", "throughput": 7939.99, "total_tokens": 11047648}
{"current_steps": 4760, "total_steps": 5305, "loss": 0.0441, "lr": 1.5964413124758492e-07, "epoch": 4.486333647502356, "percentage": 89.73, "elapsed_time": "0:23:11", "remaining_time": "0:02:39", "throughput": 7943.8, "total_tokens": 11056864}
{"current_steps": 4765, "total_steps": 5305, "loss": 0.0, "lr": 1.5676444339886327e-07, "epoch": 4.491046182846372, "percentage": 89.82, "elapsed_time": "0:23:12", "remaining_time": "0:02:37", "throughput": 7948.58, "total_tokens": 11067744}
{"current_steps": 4770, "total_steps": 5305, "loss": 0.0, "lr": 1.5391012379854937e-07, "epoch": 4.495758718190387, "percentage": 89.92, "elapsed_time": "0:23:12", "remaining_time": "0:02:36", "throughput": 7952.91, "total_tokens": 11077920}
{"current_steps": 4775, "total_steps": 5305, "loss": 0.0, "lr": 1.5108120334797e-07, "epoch": 4.500471253534402, "percentage": 90.01, "elapsed_time": "0:23:13", "remaining_time": "0:02:34", "throughput": 7957.67, "total_tokens": 11088864}
{"current_steps": 4780, "total_steps": 5305, "loss": 0.0, "lr": 1.4827771267347662e-07, "epoch": 4.5051837888784165, "percentage": 90.1, "elapsed_time": "0:23:13", "remaining_time": "0:02:33", "throughput": 7961.61, "total_tokens": 11098336}
{"current_steps": 4785, "total_steps": 5305, "loss": 0.0, "lr": 1.4549968212611538e-07, "epoch": 4.5098963242224315, "percentage": 90.2, "elapsed_time": "0:23:14", "remaining_time": "0:02:31", "throughput": 7965.47, "total_tokens": 11107680}
{"current_steps": 4788, "total_steps": 5305, "eval_loss": 0.5418137311935425, "epoch": 4.512723845428841, "percentage": 90.25, "elapsed_time": "0:23:17", "remaining_time": "0:02:30", "throughput": 7953.37, "total_tokens": 11115040}
{"current_steps": 4790, "total_steps": 5305, "loss": 0.0, "lr": 1.4274714178129534e-07, "epoch": 4.514608859566446, "percentage": 90.29, "elapsed_time": "0:24:00", "remaining_time": "0:02:34", "throughput": 7718.57, "total_tokens": 11120480}
{"current_steps": 4795, "total_steps": 5305, "loss": 0.0, "lr": 1.4002012143846472e-07, "epoch": 4.519321394910461, "percentage": 90.39, "elapsed_time": "0:24:01", "remaining_time": "0:02:33", "throughput": 7723.76, "total_tokens": 11132320}
{"current_steps": 4800, "total_steps": 5305, "loss": 0.0006, "lr": 1.3731865062078853e-07, "epoch": 4.524033930254477, "percentage": 90.48, "elapsed_time": "0:24:02", "remaining_time": "0:02:31", "throughput": 7731.54, "total_tokens": 11148960}
{"current_steps": 4805, "total_steps": 5305, "loss": 0.0, "lr": 1.3464275857482778e-07, "epoch": 4.528746465598492, "percentage": 90.57, "elapsed_time": "0:24:02", "remaining_time": "0:02:30", "throughput": 7736.27, "total_tokens": 11159968}
{"current_steps": 4810, "total_steps": 5305, "loss": 0.122, "lr": 1.3199247427022528e-07, "epoch": 4.533459000942507, "percentage": 90.67, "elapsed_time": "0:24:03", "remaining_time": "0:02:28", "throughput": 7740.91, "total_tokens": 11170848}
{"current_steps": 4815, "total_steps": 5305, "loss": 0.0, "lr": 1.293678263993872e-07, "epoch": 4.538171536286522, "percentage": 90.76, "elapsed_time": "0:24:03", "remaining_time": "0:02:26", "throughput": 7746.95, "total_tokens": 11184288}
{"current_steps": 4820, "total_steps": 5305, "loss": 0.0, "lr": 1.2676884337717882e-07, "epoch": 4.542884071630537, "percentage": 90.86, "elapsed_time": "0:24:04", "remaining_time": "0:02:25", "throughput": 7753.07, "total_tokens": 11197856}
{"current_steps": 4825, "total_steps": 5305, "loss": 0.0, "lr": 1.241955533406114e-07, "epoch": 4.547596606974552, "percentage": 90.95, "elapsed_time": "0:24:04", "remaining_time": "0:02:23", "throughput": 7758.24, "total_tokens": 11209696}
{"current_steps": 4830, "total_steps": 5305, "loss": 0.0, "lr": 1.2164798414854073e-07, "epoch": 4.552309142318568, "percentage": 91.05, "elapsed_time": "0:24:05", "remaining_time": "0:02:22", "throughput": 7762.6, "total_tokens": 11220064}
{"current_steps": 4835, "total_steps": 5305, "loss": 0.0, "lr": 1.1912616338136396e-07, "epoch": 4.557021677662583, "percentage": 91.14, "elapsed_time": "0:24:05", "remaining_time": "0:02:20", "throughput": 7766.9, "total_tokens": 11230304}
{"current_steps": 4840, "total_steps": 5305, "loss": 0.0, "lr": 1.1663011834072257e-07, "epoch": 4.561734213006598, "percentage": 91.23, "elapsed_time": "0:24:06", "remaining_time": "0:02:18", "throughput": 7770.92, "total_tokens": 11240096}
{"current_steps": 4845, "total_steps": 5305, "loss": 0.0, "lr": 1.1415987604920492e-07, "epoch": 4.566446748350613, "percentage": 91.33, "elapsed_time": "0:24:06", "remaining_time": "0:02:17", "throughput": 7775.63, "total_tokens": 11251104}
{"current_steps": 4850, "total_steps": 5305, "loss": 0.0, "lr": 1.11715463250055e-07, "epoch": 4.5711592836946275, "percentage": 91.42, "elapsed_time": "0:24:07", "remaining_time": "0:02:15", "throughput": 7779.76, "total_tokens": 11261088}
{"current_steps": 4855, "total_steps": 5305, "loss": 0.0072, "lr": 1.0929690640688218e-07, "epoch": 4.5758718190386425, "percentage": 91.52, "elapsed_time": "0:24:08", "remaining_time": "0:02:14", "throughput": 7785.1, "total_tokens": 11273312}
{"current_steps": 4860, "total_steps": 5305, "loss": 0.0003, "lr": 1.0690423170337554e-07, "epoch": 4.580584354382658, "percentage": 91.61, "elapsed_time": "0:24:08", "remaining_time": "0:02:12", "throughput": 7790.09, "total_tokens": 11284896}
{"current_steps": 4865, "total_steps": 5305, "loss": 0.0, "lr": 1.0453746504302003e-07, "epoch": 4.585296889726673, "percentage": 91.71, "elapsed_time": "0:24:09", "remaining_time": "0:02:11", "throughput": 7794.05, "total_tokens": 11294560}
{"current_steps": 4870, "total_steps": 5305, "loss": 0.0813, "lr": 1.021966320488152e-07, "epoch": 4.590009425070688, "percentage": 91.8, "elapsed_time": "0:24:09", "remaining_time": "0:02:09", "throughput": 7800.1, "total_tokens": 11308128}
{"current_steps": 4875, "total_steps": 5305, "loss": 0.0, "lr": 9.988175806299877e-08, "epoch": 4.594721960414703, "percentage": 91.89, "elapsed_time": "0:24:10", "remaining_time": "0:02:07", "throughput": 7805.81, "total_tokens": 11321056}
{"current_steps": 4880, "total_steps": 5305, "loss": 0.0, "lr": 9.759286814677305e-08, "epoch": 4.599434495758718, "percentage": 91.99, "elapsed_time": "0:24:10", "remaining_time": "0:02:06", "throughput": 7811.76, "total_tokens": 11334496}
{"current_steps": 4885, "total_steps": 5305, "loss": 0.0, "lr": 9.532998708003061e-08, "epoch": 4.604147031102733, "percentage": 92.08, "elapsed_time": "0:24:11", "remaining_time": "0:02:04", "throughput": 7816.76, "total_tokens": 11346208}
{"current_steps": 4890, "total_steps": 5305, "loss": 0.0, "lr": 9.309313936108983e-08, "epoch": 4.608859566446748, "percentage": 92.18, "elapsed_time": "0:24:12", "remaining_time": "0:02:03", "throughput": 7821.89, "total_tokens": 11358112}
{"current_steps": 4895, "total_steps": 5305, "loss": 0.0, "lr": 9.088234920642703e-08, "epoch": 4.613572101790764, "percentage": 92.27, "elapsed_time": "0:24:12", "remaining_time": "0:02:01", "throughput": 7826.0, "total_tokens": 11368096}
{"current_steps": 4900, "total_steps": 5305, "loss": 0.0, "lr": 8.869764055041501e-08, "epoch": 4.618284637134779, "percentage": 92.37, "elapsed_time": "0:24:13", "remaining_time": "0:02:00", "throughput": 7830.6, "total_tokens": 11378976}
{"current_steps": 4905, "total_steps": 5305, "loss": 0.0, "lr": 8.653903704506389e-08, "epoch": 4.622997172478794, "percentage": 92.46, "elapsed_time": "0:24:13", "remaining_time": "0:01:58", "throughput": 7835.61, "total_tokens": 11390688}
{"current_steps": 4910, "total_steps": 5305, "loss": 0.0, "lr": 8.440656205976644e-08, "epoch": 4.627709707822809, "percentage": 92.55, "elapsed_time": "0:24:14", "remaining_time": "0:01:56", "throughput": 7840.11, "total_tokens": 11401440}
{"current_steps": 4915, "total_steps": 5305, "loss": 0.0, "lr": 8.230023868104231e-08, "epoch": 4.632422243166824, "percentage": 92.65, "elapsed_time": "0:24:14", "remaining_time": "0:01:55", "throughput": 7844.73, "total_tokens": 11412448}
{"current_steps": 4920, "total_steps": 5305, "loss": 0.0, "lr": 8.022008971229039e-08, "epoch": 4.6371347785108386, "percentage": 92.74, "elapsed_time": "0:24:15", "remaining_time": "0:01:53", "throughput": 7848.85, "total_tokens": 11422496}
{"current_steps": 4925, "total_steps": 5305, "loss": 0.0, "lr": 7.816613767354098e-08, "epoch": 4.6418473138548535, "percentage": 92.84, "elapsed_time": "0:24:15", "remaining_time": "0:01:52", "throughput": 7853.53, "total_tokens": 11433632}
{"current_steps": 4930, "total_steps": 5305, "loss": 0.0, "lr": 7.613840480121176e-08, "epoch": 4.646559849198869, "percentage": 92.93, "elapsed_time": "0:24:16", "remaining_time": "0:01:50", "throughput": 7858.96, "total_tokens": 11446112}
{"current_steps": 4935, "total_steps": 5305, "loss": 0.0, "lr": 7.41369130478689e-08, "epoch": 4.651272384542884, "percentage": 93.03, "elapsed_time": "0:24:17", "remaining_time": "0:01:49", "throughput": 7864.84, "total_tokens": 11459552}
{"current_steps": 4940, "total_steps": 5305, "loss": 0.0, "lr": 7.216168408198554e-08, "epoch": 4.655984919886899, "percentage": 93.12, "elapsed_time": "0:24:17", "remaining_time": "0:01:47", "throughput": 7869.19, "total_tokens": 11469984}
{"current_steps": 4945, "total_steps": 5305, "loss": 0.0, "lr": 7.021273928771221e-08, "epoch": 4.660697455230914, "percentage": 93.21, "elapsed_time": "0:24:18", "remaining_time": "0:01:46", "throughput": 7874.28, "total_tokens": 11481888}
{"current_steps": 4950, "total_steps": 5305, "loss": 0.0579, "lr": 6.829009976464102e-08, "epoch": 4.665409990574929, "percentage": 93.31, "elapsed_time": "0:24:18", "remaining_time": "0:01:44", "throughput": 7879.99, "total_tokens": 11494944}
{"current_steps": 4955, "total_steps": 5305, "loss": 0.0, "lr": 6.639378632757986e-08, "epoch": 4.670122525918944, "percentage": 93.4, "elapsed_time": "0:24:19", "remaining_time": "0:01:43", "throughput": 7884.22, "total_tokens": 11505184}
{"current_steps": 4960, "total_steps": 5305, "loss": 0.0, "lr": 6.452381950632469e-08, "epoch": 4.674835061262959, "percentage": 93.5, "elapsed_time": "0:24:19", "remaining_time": "0:01:41", "throughput": 7889.71, "total_tokens": 11517856}
{"current_steps": 4965, "total_steps": 5305, "loss": 0.0, "lr": 6.268021954544095e-08, "epoch": 4.679547596606975, "percentage": 93.59, "elapsed_time": "0:24:20", "remaining_time": "0:01:40", "throughput": 7894.92, "total_tokens": 11530016}
{"current_steps": 4970, "total_steps": 5305, "loss": 0.0, "lr": 6.08630064040408e-08, "epoch": 4.68426013195099, "percentage": 93.69, "elapsed_time": "0:24:21", "remaining_time": "0:01:38", "throughput": 7901.82, "total_tokens": 11545376}
{"current_steps": 4975, "total_steps": 5305, "loss": 0.0, "lr": 5.9072199755567936e-08, "epoch": 4.688972667295005, "percentage": 93.78, "elapsed_time": "0:24:21", "remaining_time": "0:01:36", "throughput": 7906.43, "total_tokens": 11556448}
{"current_steps": 4980, "total_steps": 5305, "loss": 0.0, "lr": 5.730781898758614e-08, "epoch": 4.69368520263902, "percentage": 93.87, "elapsed_time": "0:24:22", "remaining_time": "0:01:35", "throughput": 7910.42, "total_tokens": 11566304}
{"current_steps": 4985, "total_steps": 5305, "loss": 0.0, "lr": 5.556988320156831e-08, "epoch": 4.698397737983035, "percentage": 93.97, "elapsed_time": "0:24:22", "remaining_time": "0:01:33", "throughput": 7914.89, "total_tokens": 11577056}
{"current_steps": 4990, "total_steps": 5305, "loss": 0.0, "lr": 5.3858411212689146e-08, "epoch": 4.7031102733270505, "percentage": 94.06, "elapsed_time": "0:24:23", "remaining_time": "0:01:32", "throughput": 7920.25, "total_tokens": 11589536}
{"current_steps": 4995, "total_steps": 5305, "loss": 0.0001, "lr": 5.2173421549621685e-08, "epoch": 4.707822808671065, "percentage": 94.16, "elapsed_time": "0:24:23", "remaining_time": "0:01:30", "throughput": 7924.38, "total_tokens": 11599648}
{"current_steps": 5000, "total_steps": 5305, "loss": 0.0, "lr": 5.051493245433775e-08, "epoch": 4.71253534401508, "percentage": 94.25, "elapsed_time": "0:24:24", "remaining_time": "0:01:29", "throughput": 7928.77, "total_tokens": 11610272}
{"current_steps": 5005, "total_steps": 5305, "loss": 0.0, "lr": 4.888296188190977e-08, "epoch": 4.717247879359095, "percentage": 94.34, "elapsed_time": "0:24:24", "remaining_time": "0:01:27", "throughput": 7933.1, "total_tokens": 11620768}
{"current_steps": 5010, "total_steps": 5305, "loss": 0.0, "lr": 4.727752750031511e-08, "epoch": 4.72196041470311, "percentage": 94.44, "elapsed_time": "0:24:25", "remaining_time": "0:01:26", "throughput": 7938.08, "total_tokens": 11632608}
{"current_steps": 5015, "total_steps": 5305, "loss": 0.0, "lr": 4.5698646690247874e-08, "epoch": 4.726672950047125, "percentage": 94.53, "elapsed_time": "0:24:26", "remaining_time": "0:01:24", "throughput": 7943.31, "total_tokens": 11644896}
{"current_steps": 5020, "total_steps": 5305, "loss": 0.0, "lr": 4.414633654492767e-08, "epoch": 4.73138548539114, "percentage": 94.63, "elapsed_time": "0:24:26", "remaining_time": "0:01:23", "throughput": 7950.77, "total_tokens": 11661344}
{"current_steps": 5025, "total_steps": 5305, "loss": 0.0, "lr": 4.2620613869915894e-08, "epoch": 4.736098020735156, "percentage": 94.72, "elapsed_time": "0:24:27", "remaining_time": "0:01:21", "throughput": 7955.29, "total_tokens": 11672288}
{"current_steps": 5030, "total_steps": 5305, "loss": 0.0, "lr": 4.112149518293362e-08, "epoch": 4.740810556079171, "percentage": 94.82, "elapsed_time": "0:24:27", "remaining_time": "0:01:20", "throughput": 7960.77, "total_tokens": 11684960}
{"current_steps": 5035, "total_steps": 5305, "loss": 0.0, "lr": 3.9648996713683715e-08, "epoch": 4.745523091423186, "percentage": 94.91, "elapsed_time": "0:24:28", "remaining_time": "0:01:18", "throughput": 7965.44, "total_tokens": 11696160}
{"current_steps": 5040, "total_steps": 5305, "loss": 0.0, "lr": 3.8203134403672905e-08, "epoch": 4.750235626767201, "percentage": 95.0, "elapsed_time": "0:24:28", "remaining_time": "0:01:17", "throughput": 7969.35, "total_tokens": 11705952}
{"current_steps": 5045, "total_steps": 5305, "loss": 0.0, "lr": 3.678392390604163e-08, "epoch": 4.754948162111216, "percentage": 95.1, "elapsed_time": "0:24:29", "remaining_time": "0:01:15", "throughput": 7973.51, "total_tokens": 11716192}
{"current_steps": 5050, "total_steps": 5305, "loss": 0.0, "lr": 3.539138058539282e-08, "epoch": 4.759660697455231, "percentage": 95.19, "elapsed_time": "0:24:29", "remaining_time": "0:01:14", "throughput": 7978.56, "total_tokens": 11728160}
{"current_steps": 5054, "total_steps": 5305, "eval_loss": 0.54215008020401, "epoch": 4.763430725730443, "percentage": 95.27, "elapsed_time": "0:24:33", "remaining_time": "0:01:13", "throughput": 7966.91, "total_tokens": 11736672}
{"current_steps": 5055, "total_steps": 5305, "loss": 0.0, "lr": 3.4025519517626174e-08, "epoch": 4.764373232799246, "percentage": 95.29, "elapsed_time": "0:24:56", "remaining_time": "0:01:13", "throughput": 7845.84, "total_tokens": 11738720}
{"current_steps": 5060, "total_steps": 5305, "loss": 0.0, "lr": 3.268635548977633e-08, "epoch": 4.7690857681432615, "percentage": 95.38, "elapsed_time": "0:24:56", "remaining_time": "0:01:12", "throughput": 7850.57, "total_tokens": 11750176}
{"current_steps": 5065, "total_steps": 5305, "loss": 0.0, "lr": 3.137390299984888e-08, "epoch": 4.773798303487276, "percentage": 95.48, "elapsed_time": "0:24:57", "remaining_time": "0:01:10", "throughput": 7855.17, "total_tokens": 11761312}
{"current_steps": 5070, "total_steps": 5305, "loss": 0.0, "lr": 3.0088176256668765e-08, "epoch": 4.778510838831291, "percentage": 95.57, "elapsed_time": "0:24:57", "remaining_time": "0:01:09", "throughput": 7860.39, "total_tokens": 11773728}
{"current_steps": 5075, "total_steps": 5305, "loss": 0.0, "lr": 2.8829189179721552e-08, "epoch": 4.783223374175306, "percentage": 95.66, "elapsed_time": "0:24:58", "remaining_time": "0:01:07", "throughput": 7864.87, "total_tokens": 11784672}
{"current_steps": 5080, "total_steps": 5305, "loss": 0.0, "lr": 2.759695539900603e-08, "epoch": 4.787935909519321, "percentage": 95.76, "elapsed_time": "0:24:58", "remaining_time": "0:01:06", "throughput": 7869.82, "total_tokens": 11796512}
{"current_steps": 5085, "total_steps": 5305, "loss": 0.0, "lr": 2.639148825488491e-08, "epoch": 4.792648444863336, "percentage": 95.85, "elapsed_time": "0:24:59", "remaining_time": "0:01:04", "throughput": 7875.84, "total_tokens": 11810464}
{"current_steps": 5090, "total_steps": 5305, "loss": 0.0, "lr": 2.5212800797941582e-08, "epoch": 4.797360980207351, "percentage": 95.95, "elapsed_time": "0:25:00", "remaining_time": "0:01:03", "throughput": 7879.94, "total_tokens": 11820768}
{"current_steps": 5095, "total_steps": 5305, "loss": 0.0, "lr": 2.406090578883691e-08, "epoch": 4.802073515551367, "percentage": 96.04, "elapsed_time": "0:25:00", "remaining_time": "0:01:01", "throughput": 7884.45, "total_tokens": 11831776}
{"current_steps": 5100, "total_steps": 5305, "loss": 0.0, "lr": 2.2935815698174045e-08, "epoch": 4.806786050895382, "percentage": 96.14, "elapsed_time": "0:25:01", "remaining_time": "0:01:00", "throughput": 7889.19, "total_tokens": 11843296}
{"current_steps": 5105, "total_steps": 5305, "loss": 0.0, "lr": 2.1837542706359958e-08, "epoch": 4.811498586239397, "percentage": 96.23, "elapsed_time": "0:25:01", "remaining_time": "0:00:58", "throughput": 7896.57, "total_tokens": 11860000}
{"current_steps": 5110, "total_steps": 5305, "loss": 0.0, "lr": 2.0766098703477178e-08, "epoch": 4.816211121583412, "percentage": 96.32, "elapsed_time": "0:25:02", "remaining_time": "0:00:57", "throughput": 7901.65, "total_tokens": 11872160}
{"current_steps": 5115, "total_steps": 5305, "loss": 0.0, "lr": 1.9721495289152237e-08, "epoch": 4.820923656927427, "percentage": 96.42, "elapsed_time": "0:25:03", "remaining_time": "0:00:55", "throughput": 7906.12, "total_tokens": 11883168}
{"current_steps": 5120, "total_steps": 5305, "loss": 0.0, "lr": 1.8703743772430783e-08, "epoch": 4.825636192271442, "percentage": 96.51, "elapsed_time": "0:25:03", "remaining_time": "0:00:54", "throughput": 7911.31, "total_tokens": 11895584}
{"current_steps": 5125, "total_steps": 5305, "loss": 0.0, "lr": 1.7712855171655996e-08, "epoch": 4.830348727615457, "percentage": 96.61, "elapsed_time": "0:25:04", "remaining_time": "0:00:52", "throughput": 7915.89, "total_tokens": 11906784}
{"current_steps": 5130, "total_steps": 5305, "loss": 0.0, "lr": 1.6748840214348972e-08, "epoch": 4.8350612629594725, "percentage": 96.7, "elapsed_time": "0:25:04", "remaining_time": "0:00:51", "throughput": 7920.26, "total_tokens": 11917600}
{"current_steps": 5135, "total_steps": 5305, "loss": 0.0, "lr": 1.5811709337091862e-08, "epoch": 4.839773798303487, "percentage": 96.8, "elapsed_time": "0:25:05", "remaining_time": "0:00:49", "throughput": 7925.25, "total_tokens": 11929632}
{"current_steps": 5140, "total_steps": 5305, "loss": 0.0, "lr": 1.4901472685415475e-08, "epoch": 4.844486333647502, "percentage": 96.89, "elapsed_time": "0:25:05", "remaining_time": "0:00:48", "throughput": 7928.71, "total_tokens": 11938720}
{"current_steps": 5145, "total_steps": 5305, "loss": 0.0072, "lr": 1.4018140113689904e-08, "epoch": 4.849198868991517, "percentage": 96.98, "elapsed_time": "0:25:06", "remaining_time": "0:00:46", "throughput": 7934.14, "total_tokens": 11951648}
{"current_steps": 5150, "total_steps": 5305, "loss": 0.0, "lr": 1.3161721185016852e-08, "epoch": 4.853911404335532, "percentage": 97.08, "elapsed_time": "0:25:06", "remaining_time": "0:00:45", "throughput": 7938.43, "total_tokens": 11962336}
{"current_steps": 5155, "total_steps": 5305, "loss": 0.0, "lr": 1.2332225171126366e-08, "epoch": 4.858623939679548, "percentage": 97.17, "elapsed_time": "0:25:07", "remaining_time": "0:00:43", "throughput": 7944.21, "total_tokens": 11975904}
{"current_steps": 5160, "total_steps": 5305, "loss": 0.0, "lr": 1.152966105227693e-08, "epoch": 4.863336475023563, "percentage": 97.27, "elapsed_time": "0:25:08", "remaining_time": "0:00:42", "throughput": 7948.3, "total_tokens": 11986208}
{"current_steps": 5165, "total_steps": 5305, "loss": 0.0, "lr": 1.0754037517158312e-08, "epoch": 4.868049010367578, "percentage": 97.36, "elapsed_time": "0:25:08", "remaining_time": "0:00:40", "throughput": 7953.92, "total_tokens": 11999520}
{"current_steps": 5170, "total_steps": 5305, "loss": 0.0, "lr": 1.0005362962796362e-08, "epoch": 4.872761545711593, "percentage": 97.46, "elapsed_time": "0:25:09", "remaining_time": "0:00:39", "throughput": 7958.8, "total_tokens": 12011424}
{"current_steps": 5175, "total_steps": 5305, "loss": 0.0, "lr": 9.283645494463368e-09, "epoch": 4.877474081055608, "percentage": 97.55, "elapsed_time": "0:25:09", "remaining_time": "0:00:37", "throughput": 7964.48, "total_tokens": 12024864}
{"current_steps": 5180, "total_steps": 5305, "loss": 0.0, "lr": 8.588892925590064e-09, "epoch": 4.882186616399623, "percentage": 97.64, "elapsed_time": "0:25:10", "remaining_time": "0:00:36", "throughput": 7968.97, "total_tokens": 12035936}
{"current_steps": 5185, "total_steps": 5305, "loss": 0.0, "lr": 7.92111277768015e-09, "epoch": 4.886899151743638, "percentage": 97.74, "elapsed_time": "0:25:10", "remaining_time": "0:00:34", "throughput": 7972.87, "total_tokens": 12045920}
{"current_steps": 5190, "total_steps": 5305, "loss": 0.0, "lr": 7.280312280230073e-09, "epoch": 4.891611687087654, "percentage": 97.83, "elapsed_time": "0:25:11", "remaining_time": "0:00:33", "throughput": 7977.9, "total_tokens": 12058144}
{"current_steps": 5195, "total_steps": 5305, "loss": 0.0, "lr": 6.666498370650198e-09, "epoch": 4.8963242224316685, "percentage": 97.93, "elapsed_time": "0:25:12", "remaining_time": "0:00:32", "throughput": 7982.66, "total_tokens": 12069792}
{"current_steps": 5200, "total_steps": 5305, "loss": 0.0, "lr": 6.079677694189046e-09, "epoch": 4.9010367577756835, "percentage": 98.02, "elapsed_time": "0:25:12", "remaining_time": "0:00:30", "throughput": 7987.12, "total_tokens": 12080864}
{"current_steps": 5205, "total_steps": 5305, "loss": 0.0, "lr": 5.5198566038627835e-09, "epoch": 4.905749293119698, "percentage": 98.11, "elapsed_time": "0:25:13", "remaining_time": "0:00:29", "throughput": 7991.73, "total_tokens": 12092256}
{"current_steps": 5210, "total_steps": 5305, "loss": 0.0, "lr": 4.987041160385287e-09, "epoch": 4.910461828463713, "percentage": 98.21, "elapsed_time": "0:25:13", "remaining_time": "0:00:27", "throughput": 7997.9, "total_tokens": 12106784}
{"current_steps": 5215, "total_steps": 5305, "loss": 0.0003, "lr": 4.481237132103189e-09, "epoch": 4.915174363807728, "percentage": 98.3, "elapsed_time": "0:25:14", "remaining_time": "0:00:26", "throughput": 8001.97, "total_tokens": 12117088}
{"current_steps": 5220, "total_steps": 5305, "loss": 0.0, "lr": 4.002449994932878e-09, "epoch": 4.919886899151743, "percentage": 98.4, "elapsed_time": "0:25:14", "remaining_time": "0:00:24", "throughput": 8006.71, "total_tokens": 12128736}
{"current_steps": 5225, "total_steps": 5305, "loss": 0.0, "lr": 3.550684932301374e-09, "epoch": 4.924599434495759, "percentage": 98.49, "elapsed_time": "0:25:15", "remaining_time": "0:00:23", "throughput": 8013.93, "total_tokens": 12145376}
{"current_steps": 5230, "total_steps": 5305, "loss": 0.0, "lr": 3.1259468350910982e-09, "epoch": 4.929311969839774, "percentage": 98.59, "elapsed_time": "0:25:16", "remaining_time": "0:00:21", "throughput": 8018.29, "total_tokens": 12156320}
{"current_steps": 5235, "total_steps": 5305, "loss": 0.0, "lr": 2.7282403015849167e-09, "epoch": 4.934024505183789, "percentage": 98.68, "elapsed_time": "0:25:16", "remaining_time": "0:00:20", "throughput": 8023.02, "total_tokens": 12167968}
{"current_steps": 5240, "total_steps": 5305, "loss": 0.0, "lr": 2.3575696374189548e-09, "epoch": 4.938737040527804, "percentage": 98.77, "elapsed_time": "0:25:17", "remaining_time": "0:00:18", "throughput": 8027.81, "total_tokens": 12179744}
{"current_steps": 5245, "total_steps": 5305, "loss": 0.0001, "lr": 2.013938855533748e-09, "epoch": 4.943449575871819, "percentage": 98.87, "elapsed_time": "0:25:17", "remaining_time": "0:00:17", "throughput": 8033.0, "total_tokens": 12192288}
{"current_steps": 5250, "total_steps": 5305, "loss": 0.0, "lr": 1.6973516761317755e-09, "epoch": 4.948162111215834, "percentage": 98.96, "elapsed_time": "0:25:18", "remaining_time": "0:00:15", "throughput": 8037.44, "total_tokens": 12203360}
{"current_steps": 5255, "total_steps": 5305, "loss": 0.0007, "lr": 1.407811526637215e-09, "epoch": 4.952874646559849, "percentage": 99.06, "elapsed_time": "0:25:18", "remaining_time": "0:00:14", "throughput": 8042.33, "total_tokens": 12215392}
{"current_steps": 5260, "total_steps": 5305, "loss": 0.0, "lr": 1.145321541659028e-09, "epoch": 4.957587181903865, "percentage": 99.15, "elapsed_time": "0:25:19", "remaining_time": "0:00:12", "throughput": 8047.34, "total_tokens": 12227680}
{"current_steps": 5265, "total_steps": 5305, "loss": 0.0, "lr": 9.098845629559871e-10, "epoch": 4.9622997172478795, "percentage": 99.25, "elapsed_time": "0:25:20", "remaining_time": "0:00:11", "throughput": 8053.55, "total_tokens": 12242336}
{"current_steps": 5270, "total_steps": 5305, "loss": 0.0, "lr": 7.015031394072557e-10, "epoch": 4.9670122525918945, "percentage": 99.34, "elapsed_time": "0:25:20", "remaining_time": "0:00:10", "throughput": 8057.18, "total_tokens": 12251936}
{"current_steps": 5275, "total_steps": 5305, "loss": 0.0, "lr": 5.201795269837995e-10, "epoch": 4.971724787935909, "percentage": 99.43, "elapsed_time": "0:25:21", "remaining_time": "0:00:08", "throughput": 8061.3, "total_tokens": 12262432}
{"current_steps": 5280, "total_steps": 5305, "loss": 0.0, "lr": 3.6591568872451634e-10, "epoch": 4.976437323279924, "percentage": 99.53, "elapsed_time": "0:25:21", "remaining_time": "0:00:07", "throughput": 8066.9, "total_tokens": 12275872}
{"current_steps": 5285, "total_steps": 5305, "loss": 0.0, "lr": 2.387132947151427e-10, "epoch": 4.981149858623939, "percentage": 99.62, "elapsed_time": "0:25:22", "remaining_time": "0:00:05", "throughput": 8072.29, "total_tokens": 12288928}
{"current_steps": 5290, "total_steps": 5305, "loss": 0.0969, "lr": 1.3857372206882436e-10, "epoch": 4.985862393967954, "percentage": 99.72, "elapsed_time": "0:25:22", "remaining_time": "0:00:04", "throughput": 8077.12, "total_tokens": 12300832}
{"current_steps": 5295, "total_steps": 5305, "loss": 0.0, "lr": 6.549805491307127e-11, "epoch": 4.99057492931197, "percentage": 99.81, "elapsed_time": "0:25:23", "remaining_time": "0:00:02", "throughput": 8081.73, "total_tokens": 12312352}
{"current_steps": 5300, "total_steps": 5305, "loss": 0.0, "lr": 1.948708437726765e-11, "epoch": 4.995287464655985, "percentage": 99.91, "elapsed_time": "0:25:23", "remaining_time": "0:00:01", "throughput": 8085.66, "total_tokens": 12322528}
{"current_steps": 5305, "total_steps": 5305, "loss": 0.0, "lr": 5.413085829575338e-13, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:25:24", "remaining_time": "0:00:00", "throughput": 8089.82, "total_tokens": 12333600}
{"current_steps": 5305, "total_steps": 5305, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:26:13", "remaining_time": "0:00:00", "throughput": 7836.34, "total_tokens": 12333600}