Files
train_mrpc_42_1774791061/trainer_log.jsonl

434 lines
102 KiB
Plaintext
Raw Permalink Normal View History

{"current_steps": 5, "total_steps": 2065, "loss": 0.7681, "lr": 9.66183574879227e-07, "epoch": 0.012106537530266344, "percentage": 0.24, "elapsed_time": "0:00:01", "remaining_time": "0:11:52", "throughput": 2515.41, "total_tokens": 4352}
{"current_steps": 10, "total_steps": 2065, "loss": 0.3056, "lr": 2.173913043478261e-06, "epoch": 0.024213075060532687, "percentage": 0.48, "elapsed_time": "0:00:02", "remaining_time": "0:07:14", "throughput": 4142.22, "total_tokens": 8768}
{"current_steps": 15, "total_steps": 2065, "loss": 0.183, "lr": 3.3816425120772947e-06, "epoch": 0.03631961259079903, "percentage": 0.73, "elapsed_time": "0:00:02", "remaining_time": "0:05:40", "throughput": 5217.14, "total_tokens": 12992}
{"current_steps": 20, "total_steps": 2065, "loss": 0.4041, "lr": 4.589371980676329e-06, "epoch": 0.048426150121065374, "percentage": 0.97, "elapsed_time": "0:00:02", "remaining_time": "0:04:53", "throughput": 6052.37, "total_tokens": 17344}
{"current_steps": 25, "total_steps": 2065, "loss": 0.4147, "lr": 5.797101449275362e-06, "epoch": 0.06053268765133172, "percentage": 1.21, "elapsed_time": "0:00:03", "remaining_time": "0:04:24", "throughput": 6694.63, "total_tokens": 21696}
{"current_steps": 30, "total_steps": 2065, "loss": 0.2132, "lr": 7.004830917874397e-06, "epoch": 0.07263922518159806, "percentage": 1.45, "elapsed_time": "0:00:03", "remaining_time": "0:04:05", "throughput": 7220.73, "total_tokens": 26112}
{"current_steps": 35, "total_steps": 2065, "loss": 0.2587, "lr": 8.212560386473431e-06, "epoch": 0.0847457627118644, "percentage": 1.69, "elapsed_time": "0:00:03", "remaining_time": "0:03:51", "throughput": 7573.75, "total_tokens": 30208}
{"current_steps": 40, "total_steps": 2065, "loss": 0.2076, "lr": 9.420289855072464e-06, "epoch": 0.09685230024213075, "percentage": 1.94, "elapsed_time": "0:00:04", "remaining_time": "0:03:40", "throughput": 7947.29, "total_tokens": 34688}
{"current_steps": 45, "total_steps": 2065, "loss": 0.1842, "lr": 1.0628019323671499e-05, "epoch": 0.1089588377723971, "percentage": 2.18, "elapsed_time": "0:00:04", "remaining_time": "0:03:32", "throughput": 8196.68, "total_tokens": 38784}
{"current_steps": 50, "total_steps": 2065, "loss": 0.3012, "lr": 1.1835748792270531e-05, "epoch": 0.12106537530266344, "percentage": 2.42, "elapsed_time": "0:00:05", "remaining_time": "0:03:25", "throughput": 8453.07, "total_tokens": 43200}
{"current_steps": 55, "total_steps": 2065, "loss": 0.1951, "lr": 1.3043478260869566e-05, "epoch": 0.13317191283292978, "percentage": 2.66, "elapsed_time": "0:00:05", "remaining_time": "0:03:20", "throughput": 8628.0, "total_tokens": 47296}
{"current_steps": 60, "total_steps": 2065, "loss": 0.2332, "lr": 1.4251207729468599e-05, "epoch": 0.14527845036319612, "percentage": 2.91, "elapsed_time": "0:00:05", "remaining_time": "0:03:15", "throughput": 8833.5, "total_tokens": 51712}
{"current_steps": 65, "total_steps": 2065, "loss": 0.2049, "lr": 1.5458937198067633e-05, "epoch": 0.15738498789346247, "percentage": 3.15, "elapsed_time": "0:00:06", "remaining_time": "0:03:11", "throughput": 8977.04, "total_tokens": 55872}
{"current_steps": 70, "total_steps": 2065, "loss": 0.2103, "lr": 1.6666666666666667e-05, "epoch": 0.1694915254237288, "percentage": 3.39, "elapsed_time": "0:00:06", "remaining_time": "0:03:07", "throughput": 9080.05, "total_tokens": 59840}
{"current_steps": 75, "total_steps": 2065, "loss": 0.3072, "lr": 1.78743961352657e-05, "epoch": 0.18159806295399517, "percentage": 3.63, "elapsed_time": "0:00:06", "remaining_time": "0:03:04", "throughput": 9195.06, "total_tokens": 64000}
{"current_steps": 80, "total_steps": 2065, "loss": 0.3841, "lr": 1.9082125603864733e-05, "epoch": 0.1937046004842615, "percentage": 3.87, "elapsed_time": "0:00:07", "remaining_time": "0:03:01", "throughput": 9322.35, "total_tokens": 68352}
{"current_steps": 85, "total_steps": 2065, "loss": 0.232, "lr": 2.028985507246377e-05, "epoch": 0.20581113801452786, "percentage": 4.12, "elapsed_time": "0:00:07", "remaining_time": "0:02:59", "throughput": 9442.75, "total_tokens": 72768}
{"current_steps": 90, "total_steps": 2065, "loss": 0.2474, "lr": 2.1497584541062805e-05, "epoch": 0.2179176755447942, "percentage": 4.36, "elapsed_time": "0:00:08", "remaining_time": "0:02:57", "throughput": 9543.92, "total_tokens": 77120}
{"current_steps": 95, "total_steps": 2065, "loss": 0.1841, "lr": 2.2705314009661836e-05, "epoch": 0.23002421307506055, "percentage": 4.6, "elapsed_time": "0:00:08", "remaining_time": "0:02:55", "throughput": 9636.51, "total_tokens": 81664}
{"current_steps": 100, "total_steps": 2065, "loss": 0.1681, "lr": 2.391304347826087e-05, "epoch": 0.24213075060532688, "percentage": 4.84, "elapsed_time": "0:00:08", "remaining_time": "0:02:53", "throughput": 9727.12, "total_tokens": 86080}
{"current_steps": 104, "total_steps": 2065, "eval_loss": 0.17402823269367218, "epoch": 0.25181598062953997, "percentage": 5.04, "elapsed_time": "0:00:09", "remaining_time": "0:03:04", "throughput": 9181.88, "total_tokens": 89600}
{"current_steps": 105, "total_steps": 2065, "loss": 0.1488, "lr": 2.5120772946859905e-05, "epoch": 0.2542372881355932, "percentage": 5.08, "elapsed_time": "0:01:08", "remaining_time": "0:21:12", "throughput": 1326.98, "total_tokens": 90432}
{"current_steps": 110, "total_steps": 2065, "loss": 0.2051, "lr": 2.632850241545894e-05, "epoch": 0.26634382566585957, "percentage": 5.33, "elapsed_time": "0:01:08", "remaining_time": "0:20:17", "throughput": 1379.58, "total_tokens": 94528}
{"current_steps": 115, "total_steps": 2065, "loss": 0.16, "lr": 2.753623188405797e-05, "epoch": 0.2784503631961259, "percentage": 5.57, "elapsed_time": "0:01:08", "remaining_time": "0:19:28", "throughput": 1434.33, "total_tokens": 98816}
{"current_steps": 120, "total_steps": 2065, "loss": 0.205, "lr": 2.8743961352657005e-05, "epoch": 0.29055690072639223, "percentage": 5.81, "elapsed_time": "0:01:09", "remaining_time": "0:18:42", "throughput": 1488.59, "total_tokens": 103104}
{"current_steps": 125, "total_steps": 2065, "loss": 0.1846, "lr": 2.995169082125604e-05, "epoch": 0.3026634382566586, "percentage": 6.05, "elapsed_time": "0:01:09", "remaining_time": "0:18:00", "throughput": 1541.33, "total_tokens": 107328}
{"current_steps": 130, "total_steps": 2065, "loss": 0.2243, "lr": 3.1159420289855074e-05, "epoch": 0.31476997578692495, "percentage": 6.3, "elapsed_time": "0:01:10", "remaining_time": "0:17:21", "throughput": 1592.61, "total_tokens": 111488}
{"current_steps": 135, "total_steps": 2065, "loss": 0.2013, "lr": 3.236714975845411e-05, "epoch": 0.3268765133171913, "percentage": 6.54, "elapsed_time": "0:01:10", "remaining_time": "0:16:46", "throughput": 1647.78, "total_tokens": 115968}
{"current_steps": 140, "total_steps": 2065, "loss": 0.2278, "lr": 3.357487922705314e-05, "epoch": 0.3389830508474576, "percentage": 6.78, "elapsed_time": "0:01:10", "remaining_time": "0:16:12", "throughput": 1698.84, "total_tokens": 120192}
{"current_steps": 145, "total_steps": 2065, "loss": 0.1886, "lr": 3.478260869565218e-05, "epoch": 0.35108958837772397, "percentage": 7.02, "elapsed_time": "0:01:11", "remaining_time": "0:15:41", "throughput": 1749.39, "total_tokens": 124416}
{"current_steps": 150, "total_steps": 2065, "loss": 0.1635, "lr": 3.5990338164251205e-05, "epoch": 0.36319612590799033, "percentage": 7.26, "elapsed_time": "0:01:11", "remaining_time": "0:15:12", "throughput": 1801.99, "total_tokens": 128832}
{"current_steps": 155, "total_steps": 2065, "loss": 0.2118, "lr": 3.719806763285024e-05, "epoch": 0.37530266343825663, "percentage": 7.51, "elapsed_time": "0:01:11", "remaining_time": "0:14:45", "throughput": 1850.7, "total_tokens": 132992}
{"current_steps": 160, "total_steps": 2065, "loss": 0.3186, "lr": 3.8405797101449274e-05, "epoch": 0.387409200968523, "percentage": 7.75, "elapsed_time": "0:01:12", "remaining_time": "0:14:20", "throughput": 1900.5, "total_tokens": 137280}
{"current_steps": 165, "total_steps": 2065, "loss": 0.2002, "lr": 3.961352657004831e-05, "epoch": 0.39951573849878935, "percentage": 7.99, "elapsed_time": "0:01:12", "remaining_time": "0:13:56", "throughput": 1949.84, "total_tokens": 141568}
{"current_steps": 170, "total_steps": 2065, "loss": 0.1792, "lr": 4.082125603864734e-05, "epoch": 0.4116222760290557, "percentage": 8.23, "elapsed_time": "0:01:12", "remaining_time": "0:13:33", "throughput": 2000.41, "total_tokens": 145984}
{"current_steps": 175, "total_steps": 2065, "loss": 0.3197, "lr": 4.202898550724638e-05, "epoch": 0.423728813559322, "percentage": 8.47, "elapsed_time": "0:01:13", "remaining_time": "0:13:12", "throughput": 2047.08, "total_tokens": 150144}
{"current_steps": 180, "total_steps": 2065, "loss": 0.3561, "lr": 4.323671497584541e-05, "epoch": 0.4358353510895884, "percentage": 8.72, "elapsed_time": "0:01:13", "remaining_time": "0:12:52", "throughput": 2097.46, "total_tokens": 154624}
{"current_steps": 185, "total_steps": 2065, "loss": 0.373, "lr": 4.4444444444444447e-05, "epoch": 0.44794188861985473, "percentage": 8.96, "elapsed_time": "0:01:14", "remaining_time": "0:12:32", "throughput": 2143.12, "total_tokens": 158784}
{"current_steps": 190, "total_steps": 2065, "loss": 0.3924, "lr": 4.565217391304348e-05, "epoch": 0.4600484261501211, "percentage": 9.2, "elapsed_time": "0:01:14", "remaining_time": "0:12:14", "throughput": 2189.86, "total_tokens": 163072}
{"current_steps": 195, "total_steps": 2065, "loss": 0.2368, "lr": 4.6859903381642516e-05, "epoch": 0.4721549636803874, "percentage": 9.44, "elapsed_time": "0:01:14", "remaining_time": "0:11:57", "throughput": 2233.02, "total_tokens": 167104}
{"current_steps": 200, "total_steps": 2065, "loss": 0.4497, "lr": 4.806763285024155e-05, "epoch": 0.48426150121065376, "percentage": 9.69, "elapsed_time": "0:01:15", "remaining_time": "0:11:41", "throughput": 2279.73, "total_tokens": 171456}
{"current_steps": 205, "total_steps": 2065, "loss": 0.2715, "lr": 4.9275362318840584e-05, "epoch": 0.4963680387409201, "percentage": 9.93, "elapsed_time": "0:01:15", "remaining_time": "0:11:25", "throughput": 2326.06, "total_tokens": 175808}
{"current_steps": 208, "total_steps": 2065, "eval_loss": 0.23122040927410126, "epoch": 0.5036319612590799, "percentage": 10.07, "elapsed_time": "0:01:16", "remaining_time": "0:11:22", "throughput": 2338.42, "total_tokens": 178688}
{"current_steps": 210, "total_steps": 2065, "loss": 0.1981, "lr": 4.9999857052054956e-05, "epoch": 0.5084745762711864, "percentage": 10.17, "elapsed_time": "0:01:43", "remaining_time": "0:15:17", "throughput": 1735.73, "total_tokens": 180224}
{"current_steps": 215, "total_steps": 2065, "loss": 0.1989, "lr": 4.999824890644693e-05, "epoch": 0.5205811138014528, "percentage": 10.41, "elapsed_time": "0:01:44", "remaining_time": "0:14:56", "throughput": 1772.44, "total_tokens": 184704}
{"current_steps": 220, "total_steps": 2065, "loss": 0.2336, "lr": 4.9994854045622684e-05, "epoch": 0.5326876513317191, "percentage": 10.65, "elapsed_time": "0:01:44", "remaining_time": "0:14:37", "throughput": 1808.9, "total_tokens": 189184}
{"current_steps": 225, "total_steps": 2065, "loss": 0.1595, "lr": 4.9989672712225204e-05, "epoch": 0.5447941888619855, "percentage": 10.9, "elapsed_time": "0:01:44", "remaining_time": "0:14:18", "throughput": 1843.9, "total_tokens": 193536}
{"current_steps": 230, "total_steps": 2065, "loss": 0.2147, "lr": 4.998270527658311e-05, "epoch": 0.5569007263922519, "percentage": 11.14, "elapsed_time": "0:01:45", "remaining_time": "0:14:00", "throughput": 1878.72, "total_tokens": 197888}
{"current_steps": 235, "total_steps": 2065, "loss": 0.1959, "lr": 4.9973952236684216e-05, "epoch": 0.5690072639225182, "percentage": 11.38, "elapsed_time": "0:01:45", "remaining_time": "0:13:43", "throughput": 1912.06, "total_tokens": 202112}
{"current_steps": 240, "total_steps": 2065, "loss": 0.2085, "lr": 4.996341421813993e-05, "epoch": 0.5811138014527845, "percentage": 11.62, "elapsed_time": "0:01:46", "remaining_time": "0:13:26", "throughput": 1946.98, "total_tokens": 206528}
{"current_steps": 245, "total_steps": 2065, "loss": 0.2304, "lr": 4.9951091974140506e-05, "epoch": 0.5932203389830508, "percentage": 11.86, "elapsed_time": "0:01:46", "remaining_time": "0:13:12", "throughput": 1977.19, "total_tokens": 210944}
{"current_steps": 250, "total_steps": 2065, "loss": 0.2171, "lr": 4.99369863854013e-05, "epoch": 0.6053268765133172, "percentage": 12.11, "elapsed_time": "0:01:47", "remaining_time": "0:12:57", "throughput": 2009.17, "total_tokens": 215104}
{"current_steps": 255, "total_steps": 2065, "loss": 0.2458, "lr": 4.992109846009972e-05, "epoch": 0.6174334140435835, "percentage": 12.35, "elapsed_time": "0:01:47", "remaining_time": "0:12:42", "throughput": 2041.56, "total_tokens": 219328}
{"current_steps": 260, "total_steps": 2065, "loss": 0.219, "lr": 4.990342933380321e-05, "epoch": 0.6295399515738499, "percentage": 12.59, "elapsed_time": "0:01:47", "remaining_time": "0:12:28", "throughput": 2074.86, "total_tokens": 223680}
{"current_steps": 265, "total_steps": 2065, "loss": 0.3803, "lr": 4.9883980269388106e-05, "epoch": 0.6416464891041163, "percentage": 12.83, "elapsed_time": "0:01:48", "remaining_time": "0:12:14", "throughput": 2106.8, "total_tokens": 227904}
{"current_steps": 270, "total_steps": 2065, "loss": 0.3005, "lr": 4.986275265694935e-05, "epoch": 0.6537530266343826, "percentage": 13.08, "elapsed_time": "0:01:48", "remaining_time": "0:12:01", "throughput": 2136.82, "total_tokens": 231936}
{"current_steps": 275, "total_steps": 2065, "loss": 0.2954, "lr": 4.9839748013701145e-05, "epoch": 0.6658595641646489, "percentage": 13.32, "elapsed_time": "0:01:48", "remaining_time": "0:11:48", "throughput": 2168.34, "total_tokens": 236160}
{"current_steps": 280, "total_steps": 2065, "loss": 0.2924, "lr": 4.981496798386849e-05, "epoch": 0.6779661016949152, "percentage": 13.56, "elapsed_time": "0:01:49", "remaining_time": "0:11:36", "throughput": 2199.06, "total_tokens": 240320}
{"current_steps": 285, "total_steps": 2065, "loss": 0.1771, "lr": 4.978841433856971e-05, "epoch": 0.6900726392251816, "percentage": 13.8, "elapsed_time": "0:01:49", "remaining_time": "0:11:24", "throughput": 2232.26, "total_tokens": 244800}
{"current_steps": 290, "total_steps": 2065, "loss": 0.194, "lr": 4.976008897568981e-05, "epoch": 0.7021791767554479, "percentage": 14.04, "elapsed_time": "0:01:50", "remaining_time": "0:11:13", "throughput": 2264.28, "total_tokens": 249152}
{"current_steps": 295, "total_steps": 2065, "loss": 0.2064, "lr": 4.972999391974488e-05, "epoch": 0.7142857142857143, "percentage": 14.29, "elapsed_time": "0:01:50", "remaining_time": "0:11:02", "throughput": 2294.98, "total_tokens": 253376}
{"current_steps": 300, "total_steps": 2065, "loss": 0.2096, "lr": 4.969813132173735e-05, "epoch": 0.7263922518159807, "percentage": 14.53, "elapsed_time": "0:01:50", "remaining_time": "0:10:51", "throughput": 2326.0, "total_tokens": 257664}
{"current_steps": 305, "total_steps": 2065, "loss": 0.1712, "lr": 4.966450345900229e-05, "epoch": 0.738498789346247, "percentage": 14.77, "elapsed_time": "0:01:51", "remaining_time": "0:10:41", "throughput": 2357.33, "total_tokens": 262016}
{"current_steps": 310, "total_steps": 2065, "loss": 0.2276, "lr": 4.962911273504461e-05, "epoch": 0.7506053268765133, "percentage": 15.01, "elapsed_time": "0:01:51", "remaining_time": "0:10:31", "throughput": 2389.12, "total_tokens": 266432}
{"current_steps": 312, "total_steps": 2065, "eval_loss": 0.22853781282901764, "epoch": 0.7554479418886199, "percentage": 15.11, "elapsed_time": "0:01:53", "remaining_time": "0:10:40", "throughput": 2351.03, "total_tokens": 267968}
{"current_steps": 315, "total_steps": 2065, "loss": 0.2349, "lr": 4.9591961679367284e-05, "epoch": 0.7627118644067796, "percentage": 15.25, "elapsed_time": "0:02:22", "remaining_time": "0:13:12", "throughput": 1895.52, "total_tokens": 270464}
{"current_steps": 320, "total_steps": 2065, "loss": 0.2824, "lr": 4.955305294729056e-05, "epoch": 0.774818401937046, "percentage": 15.5, "elapsed_time": "0:02:23", "remaining_time": "0:13:00", "throughput": 1920.08, "total_tokens": 274688}
{"current_steps": 325, "total_steps": 2065, "loss": 0.3105, "lr": 4.951238931976216e-05, "epoch": 0.7869249394673123, "percentage": 15.74, "elapsed_time": "0:02:23", "remaining_time": "0:12:47", "throughput": 1944.08, "total_tokens": 278848}
{"current_steps": 330, "total_steps": 2065, "loss": 0.2667, "lr": 4.9469973703158565e-05, "epoch": 0.7990314769975787, "percentage": 15.98, "elapsed_time": "0:02:23", "remaining_time": "0:12:36", "throughput": 1968.81, "total_tokens": 283136}
{"current_steps": 335, "total_steps": 2065, "loss": 0.2213, "lr": 4.9425809129077204e-05, "epoch": 0.8111380145278451, "percentage": 16.22, "elapsed_time": "0:02:24", "remaining_time": "0:12:24", "throughput": 1995.13, "total_tokens": 287680}
{"current_steps": 340, "total_steps": 2065, "loss": 0.1887, "lr": 4.937989875411985e-05, "epoch": 0.8232445520581114, "percentage": 16.46, "elapsed_time": "0:02:24", "remaining_time": "0:12:13", "throughput": 2021.36, "total_tokens": 292224}
{"current_steps": 345, "total_steps": 2065, "loss": 0.2499, "lr": 4.933224585966696e-05, "epoch": 0.8353510895883777, "percentage": 16.71, "elapsed_time": "0:02:24", "remaining_time": "0:12:02", "throughput": 2045.3, "total_tokens": 296448}
{"current_steps": 350, "total_steps": 2065, "loss": 0.2431, "lr": 4.928285385164315e-05, "epoch": 0.847457627118644, "percentage": 16.95, "elapsed_time": "0:02:25", "remaining_time": "0:11:52", "throughput": 2069.54, "total_tokens": 300736}
{"current_steps": 355, "total_steps": 2065, "loss": 0.2588, "lr": 4.923172626027379e-05, "epoch": 0.8595641646489104, "percentage": 17.19, "elapsed_time": "0:02:25", "remaining_time": "0:11:41", "throughput": 2093.18, "total_tokens": 304960}
{"current_steps": 360, "total_steps": 2065, "loss": 0.2322, "lr": 4.917886673983267e-05, "epoch": 0.8716707021791767, "percentage": 17.43, "elapsed_time": "0:02:26", "remaining_time": "0:11:31", "throughput": 2116.72, "total_tokens": 309184}
{"current_steps": 365, "total_steps": 2065, "loss": 0.2314, "lr": 4.912427906838078e-05, "epoch": 0.8837772397094431, "percentage": 17.68, "elapsed_time": "0:02:26", "remaining_time": "0:11:22", "throughput": 2140.14, "total_tokens": 313408}
{"current_steps": 370, "total_steps": 2065, "loss": 0.1782, "lr": 4.906796714749635e-05, "epoch": 0.8958837772397095, "percentage": 17.92, "elapsed_time": "0:02:26", "remaining_time": "0:11:12", "throughput": 2165.16, "total_tokens": 317888}
{"current_steps": 375, "total_steps": 2065, "loss": 0.1873, "lr": 4.900993500199591e-05, "epoch": 0.9079903147699758, "percentage": 18.16, "elapsed_time": "0:02:27", "remaining_time": "0:11:03", "throughput": 2187.92, "total_tokens": 322048}
{"current_steps": 380, "total_steps": 2065, "loss": 0.1985, "lr": 4.895018677964669e-05, "epoch": 0.9200968523002422, "percentage": 18.4, "elapsed_time": "0:02:27", "remaining_time": "0:10:54", "throughput": 2212.99, "total_tokens": 326592}
{"current_steps": 385, "total_steps": 2065, "loss": 0.3036, "lr": 4.8888726750870126e-05, "epoch": 0.9322033898305084, "percentage": 18.64, "elapsed_time": "0:02:27", "remaining_time": "0:10:45", "throughput": 2236.34, "total_tokens": 330880}
{"current_steps": 390, "total_steps": 2065, "loss": 0.2224, "lr": 4.882555930843664e-05, "epoch": 0.9443099273607748, "percentage": 18.89, "elapsed_time": "0:02:28", "remaining_time": "0:10:37", "throughput": 2259.21, "total_tokens": 335104}
{"current_steps": 395, "total_steps": 2065, "loss": 0.1898, "lr": 4.87606889671517e-05, "epoch": 0.9564164648910412, "percentage": 19.13, "elapsed_time": "0:02:28", "remaining_time": "0:10:28", "throughput": 2282.36, "total_tokens": 339392}
{"current_steps": 400, "total_steps": 2065, "loss": 0.1663, "lr": 4.8694120363533104e-05, "epoch": 0.9685230024213075, "percentage": 19.37, "elapsed_time": "0:02:29", "remaining_time": "0:10:20", "throughput": 2305.78, "total_tokens": 343744}
{"current_steps": 405, "total_steps": 2065, "loss": 0.1954, "lr": 4.8625858255479574e-05, "epoch": 0.9806295399515739, "percentage": 19.61, "elapsed_time": "0:02:29", "remaining_time": "0:10:12", "throughput": 2329.48, "total_tokens": 348160}
{"current_steps": 410, "total_steps": 2065, "loss": 0.2606, "lr": 4.855590752193076e-05, "epoch": 0.9927360774818402, "percentage": 19.85, "elapsed_time": "0:02:29", "remaining_time": "0:10:04", "throughput": 2352.34, "total_tokens": 352448}
{"current_steps": 415, "total_steps": 2065, "loss": 0.5572, "lr": 4.848427316251842e-05, "epoch": 1.0048426150121066, "percentage": 20.1, "elapsed_time": "0:02:30", "remaining_time": "0:09:59", "throughput": 2365.4, "total_tokens": 356656}
{"current_steps": 416, "total_steps": 2065, "eval_loss": 0.2624819278717041, "epoch": 1.0072639225181599, "percentage": 20.15, "elapsed_time": "0:02:31", "remaining_time": "0:10:01", "throughput": 2356.64, "total_tokens": 357488}
{"current_steps": 420, "total_steps": 2065, "loss": 0.2346, "lr": 4.841096029720921e-05, "epoch": 1.0169491525423728, "percentage": 20.34, "elapsed_time": "0:03:34", "remaining_time": "0:14:00", "throughput": 1682.04, "total_tokens": 360880}
{"current_steps": 425, "total_steps": 2065, "loss": 0.1819, "lr": 4.8335974165938615e-05, "epoch": 1.0290556900726393, "percentage": 20.58, "elapsed_time": "0:03:34", "remaining_time": "0:13:49", "throughput": 1698.77, "total_tokens": 365104}
{"current_steps": 430, "total_steps": 2065, "loss": 0.1495, "lr": 4.825932012823652e-05, "epoch": 1.0411622276029056, "percentage": 20.82, "elapsed_time": "0:03:35", "remaining_time": "0:13:38", "throughput": 1717.5, "total_tokens": 369776}
{"current_steps": 435, "total_steps": 2065, "loss": 0.2583, "lr": 4.8181003662844074e-05, "epoch": 1.053268765133172, "percentage": 21.07, "elapsed_time": "0:03:35", "remaining_time": "0:13:28", "throughput": 1734.11, "total_tokens": 374000}
{"current_steps": 440, "total_steps": 2065, "loss": 0.2093, "lr": 4.8101030367322195e-05, "epoch": 1.0653753026634383, "percentage": 21.31, "elapsed_time": "0:03:36", "remaining_time": "0:13:17", "throughput": 1750.11, "total_tokens": 378096}
{"current_steps": 445, "total_steps": 2065, "loss": 0.1806, "lr": 4.8019405957651395e-05, "epoch": 1.0774818401937045, "percentage": 21.55, "elapsed_time": "0:03:36", "remaining_time": "0:13:07", "throughput": 1766.35, "total_tokens": 382256}
{"current_steps": 450, "total_steps": 2065, "loss": 0.3307, "lr": 4.793613626782331e-05, "epoch": 1.089588377723971, "percentage": 21.79, "elapsed_time": "0:03:36", "remaining_time": "0:12:58", "throughput": 1783.69, "total_tokens": 386672}
{"current_steps": 455, "total_steps": 2065, "loss": 0.2208, "lr": 4.785122724942367e-05, "epoch": 1.1016949152542372, "percentage": 22.03, "elapsed_time": "0:03:37", "remaining_time": "0:12:48", "throughput": 1800.37, "total_tokens": 390960}
{"current_steps": 460, "total_steps": 2065, "loss": 0.2978, "lr": 4.776468497120698e-05, "epoch": 1.1138014527845037, "percentage": 22.28, "elapsed_time": "0:03:37", "remaining_time": "0:12:38", "throughput": 1817.85, "total_tokens": 395440}
{"current_steps": 465, "total_steps": 2065, "loss": 0.2315, "lr": 4.7676515618662684e-05, "epoch": 1.12590799031477, "percentage": 22.52, "elapsed_time": "0:03:37", "remaining_time": "0:12:29", "throughput": 1833.82, "total_tokens": 399600}
{"current_steps": 470, "total_steps": 2065, "loss": 0.2236, "lr": 4.758672549357316e-05, "epoch": 1.1380145278450362, "percentage": 22.76, "elapsed_time": "0:03:38", "remaining_time": "0:12:20", "throughput": 1850.34, "total_tokens": 403888}
{"current_steps": 475, "total_steps": 2065, "loss": 0.1689, "lr": 4.749532101356322e-05, "epoch": 1.1501210653753027, "percentage": 23.0, "elapsed_time": "0:03:38", "remaining_time": "0:12:11", "throughput": 1866.81, "total_tokens": 408176}
{"current_steps": 480, "total_steps": 2065, "loss": 0.2012, "lr": 4.740230871164147e-05, "epoch": 1.162227602905569, "percentage": 23.24, "elapsed_time": "0:03:39", "remaining_time": "0:12:03", "throughput": 1882.08, "total_tokens": 412208}
{"current_steps": 485, "total_steps": 2065, "loss": 0.1816, "lr": 4.730769523573337e-05, "epoch": 1.1743341404358354, "percentage": 23.49, "elapsed_time": "0:03:39", "remaining_time": "0:11:54", "throughput": 1898.99, "total_tokens": 416624}
{"current_steps": 490, "total_steps": 2065, "loss": 0.2491, "lr": 4.7211487348206054e-05, "epoch": 1.1864406779661016, "percentage": 23.73, "elapsed_time": "0:03:39", "remaining_time": "0:11:46", "throughput": 1915.85, "total_tokens": 421040}
{"current_steps": 495, "total_steps": 2065, "loss": 0.203, "lr": 4.711369192538503e-05, "epoch": 1.1985472154963681, "percentage": 23.97, "elapsed_time": "0:03:40", "remaining_time": "0:11:38", "throughput": 1931.24, "total_tokens": 425136}
{"current_steps": 500, "total_steps": 2065, "loss": 0.4102, "lr": 4.7014315957062685e-05, "epoch": 1.2106537530266344, "percentage": 24.21, "elapsed_time": "0:03:40", "remaining_time": "0:11:30", "throughput": 1948.52, "total_tokens": 429680}
{"current_steps": 505, "total_steps": 2065, "loss": 0.2409, "lr": 4.691336654599873e-05, "epoch": 1.2227602905569008, "percentage": 24.46, "elapsed_time": "0:03:40", "remaining_time": "0:11:22", "throughput": 1965.78, "total_tokens": 434224}
{"current_steps": 510, "total_steps": 2065, "loss": 0.2191, "lr": 4.6810850907412484e-05, "epoch": 1.234866828087167, "percentage": 24.7, "elapsed_time": "0:03:41", "remaining_time": "0:11:14", "throughput": 1980.99, "total_tokens": 438320}
{"current_steps": 515, "total_steps": 2065, "loss": 0.1975, "lr": 4.670677636846723e-05, "epoch": 1.2469733656174333, "percentage": 24.94, "elapsed_time": "0:03:41", "remaining_time": "0:11:07", "throughput": 1997.3, "total_tokens": 442672}
{"current_steps": 520, "total_steps": 2065, "loss": 0.1881, "lr": 4.660115036774648e-05, "epoch": 1.2590799031476998, "percentage": 25.18, "elapsed_time": "0:03:42", "remaining_time": "0:10:59", "throughput": 2012.99, "total_tokens": 446896}
{"current_steps": 520, "total_steps": 2065, "eval_loss": 0.1976936012506485, "epoch": 1.2590799031476998, "percentage": 25.18, "elapsed_time": "0:03:42", "remaining_time": "0:11:01", "throughput": 2006.94, "total_tokens": 446896}
{"current_steps": 525, "total_steps": 2065, "loss": 0.2485, "lr": 4.6493980454722344e-05, "epoch": 1.271186440677966, "percentage": 25.42, "elapsed_time": "0:04:26", "remaining_time": "0:13:01", "throughput": 1694.99, "total_tokens": 451312}
{"current_steps": 530, "total_steps": 2065, "loss": 0.2053, "lr": 4.638527428921592e-05, "epoch": 1.2832929782082325, "percentage": 25.67, "elapsed_time": "0:04:26", "remaining_time": "0:12:52", "throughput": 1708.0, "total_tokens": 455408}
{"current_steps": 535, "total_steps": 2065, "loss": 0.1867, "lr": 4.627503964084981e-05, "epoch": 1.2953995157384988, "percentage": 25.91, "elapsed_time": "0:04:27", "remaining_time": "0:12:43", "throughput": 1723.07, "total_tokens": 460080}
{"current_steps": 540, "total_steps": 2065, "loss": 0.1674, "lr": 4.6163284388492835e-05, "epoch": 1.307506053268765, "percentage": 26.15, "elapsed_time": "0:04:27", "remaining_time": "0:12:35", "throughput": 1737.15, "total_tokens": 464496}
{"current_steps": 545, "total_steps": 2065, "loss": 0.2045, "lr": 4.605001651969686e-05, "epoch": 1.3196125907990315, "percentage": 26.39, "elapsed_time": "0:04:27", "remaining_time": "0:12:26", "throughput": 1750.5, "total_tokens": 468720}
{"current_steps": 550, "total_steps": 2065, "loss": 0.191, "lr": 4.593524413012592e-05, "epoch": 1.331719128329298, "percentage": 26.63, "elapsed_time": "0:04:28", "remaining_time": "0:12:18", "throughput": 1764.97, "total_tokens": 473264}
{"current_steps": 555, "total_steps": 2065, "loss": 0.1828, "lr": 4.5818975422977606e-05, "epoch": 1.3438256658595642, "percentage": 26.88, "elapsed_time": "0:04:28", "remaining_time": "0:12:10", "throughput": 1778.47, "total_tokens": 477552}
{"current_steps": 560, "total_steps": 2065, "loss": 0.1546, "lr": 4.570121870839671e-05, "epoch": 1.3559322033898304, "percentage": 27.12, "elapsed_time": "0:04:28", "remaining_time": "0:12:02", "throughput": 1792.64, "total_tokens": 482032}
{"current_steps": 565, "total_steps": 2065, "loss": 0.2025, "lr": 4.558198240288131e-05, "epoch": 1.368038740920097, "percentage": 27.36, "elapsed_time": "0:04:29", "remaining_time": "0:11:54", "throughput": 1806.28, "total_tokens": 486384}
{"current_steps": 570, "total_steps": 2065, "loss": 0.2413, "lr": 4.546127502868118e-05, "epoch": 1.3801452784503632, "percentage": 27.6, "elapsed_time": "0:04:29", "remaining_time": "0:11:47", "throughput": 1819.67, "total_tokens": 490672}
{"current_steps": 575, "total_steps": 2065, "loss": 0.2163, "lr": 4.5339105213188714e-05, "epoch": 1.3922518159806296, "percentage": 27.85, "elapsed_time": "0:04:30", "remaining_time": "0:11:39", "throughput": 1833.01, "total_tokens": 494960}
{"current_steps": 580, "total_steps": 2065, "loss": 0.3013, "lr": 4.521548168832227e-05, "epoch": 1.4043583535108959, "percentage": 28.09, "elapsed_time": "0:04:30", "remaining_time": "0:11:32", "throughput": 1845.85, "total_tokens": 499120}
{"current_steps": 585, "total_steps": 2065, "loss": 0.2324, "lr": 4.509041328990204e-05, "epoch": 1.4164648910411621, "percentage": 28.33, "elapsed_time": "0:04:30", "remaining_time": "0:11:25", "throughput": 1859.16, "total_tokens": 503408}
{"current_steps": 590, "total_steps": 2065, "loss": 0.1956, "lr": 4.4963908957018576e-05, "epoch": 1.4285714285714286, "percentage": 28.57, "elapsed_time": "0:04:31", "remaining_time": "0:11:17", "throughput": 1871.05, "total_tokens": 507312}
{"current_steps": 595, "total_steps": 2065, "loss": 0.2206, "lr": 4.483597773139386e-05, "epoch": 1.4406779661016949, "percentage": 28.81, "elapsed_time": "0:04:31", "remaining_time": "0:11:10", "throughput": 1884.29, "total_tokens": 511600}
{"current_steps": 600, "total_steps": 2065, "loss": 0.1973, "lr": 4.470662875673506e-05, "epoch": 1.4527845036319613, "percentage": 29.06, "elapsed_time": "0:04:31", "remaining_time": "0:11:03", "throughput": 1897.48, "total_tokens": 515888}
{"current_steps": 605, "total_steps": 2065, "loss": 0.1848, "lr": 4.457587127808096e-05, "epoch": 1.4648910411622276, "percentage": 29.3, "elapsed_time": "0:04:32", "remaining_time": "0:10:56", "throughput": 1909.72, "total_tokens": 519920}
{"current_steps": 610, "total_steps": 2065, "loss": 0.1922, "lr": 4.4443714641141255e-05, "epoch": 1.4769975786924938, "percentage": 29.54, "elapsed_time": "0:04:32", "remaining_time": "0:10:50", "throughput": 1923.28, "total_tokens": 524336}
{"current_steps": 615, "total_steps": 2065, "loss": 0.1922, "lr": 4.4310168291628504e-05, "epoch": 1.4891041162227603, "percentage": 29.78, "elapsed_time": "0:04:32", "remaining_time": "0:10:43", "throughput": 1935.9, "total_tokens": 528496}
{"current_steps": 620, "total_steps": 2065, "loss": 0.1809, "lr": 4.4175241774583084e-05, "epoch": 1.5012106537530268, "percentage": 30.02, "elapsed_time": "0:04:33", "remaining_time": "0:10:37", "throughput": 1948.96, "total_tokens": 532784}
{"current_steps": 624, "total_steps": 2065, "eval_loss": 0.19258780777454376, "epoch": 1.5108958837772397, "percentage": 30.22, "elapsed_time": "0:04:34", "remaining_time": "0:10:33", "throughput": 1954.71, "total_tokens": 536176}
{"current_steps": 625, "total_steps": 2065, "loss": 0.2205, "lr": 4.403894473369092e-05, "epoch": 1.513317191283293, "percentage": 30.27, "elapsed_time": "0:05:14", "remaining_time": "0:12:05", "throughput": 1706.31, "total_tokens": 537136}
{"current_steps": 630, "total_steps": 2065, "loss": 0.26, "lr": 4.390128691059423e-05, "epoch": 1.5254237288135593, "percentage": 30.51, "elapsed_time": "0:05:15", "remaining_time": "0:11:57", "throughput": 1718.3, "total_tokens": 541552}
{"current_steps": 635, "total_steps": 2065, "loss": 0.2678, "lr": 4.3762278144195236e-05, "epoch": 1.5375302663438255, "percentage": 30.75, "elapsed_time": "0:05:15", "remaining_time": "0:11:50", "throughput": 1729.27, "total_tokens": 545648}
{"current_steps": 640, "total_steps": 2065, "loss": 0.2246, "lr": 4.362192836995299e-05, "epoch": 1.549636803874092, "percentage": 30.99, "elapsed_time": "0:05:15", "remaining_time": "0:11:43", "throughput": 1741.78, "total_tokens": 550256}
{"current_steps": 645, "total_steps": 2065, "loss": 0.2397, "lr": 4.348024761917321e-05, "epoch": 1.5617433414043584, "percentage": 31.23, "elapsed_time": "0:05:16", "remaining_time": "0:11:36", "throughput": 1754.47, "total_tokens": 554928}
{"current_steps": 650, "total_steps": 2065, "loss": 0.2303, "lr": 4.333724601829132e-05, "epoch": 1.5738498789346247, "percentage": 31.48, "elapsed_time": "0:05:16", "remaining_time": "0:11:29", "throughput": 1766.33, "total_tokens": 559344}
{"current_steps": 655, "total_steps": 2065, "loss": 0.2178, "lr": 4.319293378814868e-05, "epoch": 1.585956416464891, "percentage": 31.72, "elapsed_time": "0:05:17", "remaining_time": "0:11:22", "throughput": 1778.17, "total_tokens": 563760}
{"current_steps": 660, "total_steps": 2065, "loss": 0.1945, "lr": 4.304732124326206e-05, "epoch": 1.5980629539951574, "percentage": 31.96, "elapsed_time": "0:05:17", "remaining_time": "0:11:15", "throughput": 1789.8, "total_tokens": 568112}
{"current_steps": 665, "total_steps": 2065, "loss": 0.1908, "lr": 4.2900418791086403e-05, "epoch": 1.6101694915254239, "percentage": 32.2, "elapsed_time": "0:05:17", "remaining_time": "0:11:09", "throughput": 1801.39, "total_tokens": 572464}
{"current_steps": 670, "total_steps": 2065, "loss": 0.2026, "lr": 4.275223693127103e-05, "epoch": 1.6222760290556901, "percentage": 32.45, "elapsed_time": "0:05:18", "remaining_time": "0:11:02", "throughput": 1812.76, "total_tokens": 576752}
{"current_steps": 675, "total_steps": 2065, "loss": 0.1959, "lr": 4.260278625490911e-05, "epoch": 1.6343825665859564, "percentage": 32.69, "elapsed_time": "0:05:18", "remaining_time": "0:10:55", "throughput": 1823.89, "total_tokens": 580976}
{"current_steps": 680, "total_steps": 2065, "loss": 0.2025, "lr": 4.2452077443780744e-05, "epoch": 1.6464891041162226, "percentage": 32.93, "elapsed_time": "0:05:18", "remaining_time": "0:10:49", "throughput": 1835.2, "total_tokens": 585264}
{"current_steps": 685, "total_steps": 2065, "loss": 0.1777, "lr": 4.2300121269589475e-05, "epoch": 1.658595641646489, "percentage": 33.17, "elapsed_time": "0:05:19", "remaining_time": "0:10:43", "throughput": 1847.07, "total_tokens": 589744}
{"current_steps": 690, "total_steps": 2065, "loss": 0.2142, "lr": 4.214692859319237e-05, "epoch": 1.6707021791767556, "percentage": 33.41, "elapsed_time": "0:05:19", "remaining_time": "0:10:37", "throughput": 1858.13, "total_tokens": 593968}
{"current_steps": 695, "total_steps": 2065, "loss": 0.2096, "lr": 4.19925103638238e-05, "epoch": 1.6828087167070218, "percentage": 33.66, "elapsed_time": "0:05:20", "remaining_time": "0:10:30", "throughput": 1869.38, "total_tokens": 598256}
{"current_steps": 700, "total_steps": 2065, "loss": 0.1881, "lr": 4.183687761831281e-05, "epoch": 1.694915254237288, "percentage": 33.9, "elapsed_time": "0:05:20", "remaining_time": "0:10:24", "throughput": 1880.79, "total_tokens": 602608}
{"current_steps": 705, "total_steps": 2065, "loss": 0.1678, "lr": 4.168004148029435e-05, "epoch": 1.7070217917675545, "percentage": 34.14, "elapsed_time": "0:05:20", "remaining_time": "0:10:18", "throughput": 1892.56, "total_tokens": 607088}
{"current_steps": 710, "total_steps": 2065, "loss": 0.243, "lr": 4.1522013159414144e-05, "epoch": 1.7191283292978208, "percentage": 34.38, "elapsed_time": "0:05:21", "remaining_time": "0:10:12", "throughput": 1903.34, "total_tokens": 611248}
{"current_steps": 715, "total_steps": 2065, "loss": 0.2024, "lr": 4.136280395052754e-05, "epoch": 1.7312348668280872, "percentage": 34.62, "elapsed_time": "0:05:21", "remaining_time": "0:10:07", "throughput": 1914.47, "total_tokens": 615536}
{"current_steps": 720, "total_steps": 2065, "loss": 0.1803, "lr": 4.120242523289223e-05, "epoch": 1.7433414043583535, "percentage": 34.87, "elapsed_time": "0:05:21", "remaining_time": "0:10:01", "throughput": 1925.97, "total_tokens": 619952}
{"current_steps": 725, "total_steps": 2065, "loss": 0.1949, "lr": 4.1040888469354925e-05, "epoch": 1.7554479418886197, "percentage": 35.11, "elapsed_time": "0:05:22", "remaining_time": "0:09:55", "throughput": 1937.45, "total_tokens": 624368}
{"current_steps": 728, "total_steps": 2065, "eval_loss": 0.19822187721729279, "epoch": 1.7627118644067796, "percentage": 35.25, "elapsed_time": "0:05:23", "remaining_time": "0:09:54", "throughput": 1937.69, "total_tokens": 626992}
{"current_steps": 730, "total_steps": 2065, "loss": 0.1935, "lr": 4.087820520553205e-05, "epoch": 1.7675544794188862, "percentage": 35.35, "elapsed_time": "0:06:23", "remaining_time": "0:11:40", "throughput": 1641.4, "total_tokens": 628720}
{"current_steps": 735, "total_steps": 2065, "loss": 0.1884, "lr": 4.0714387068984574e-05, "epoch": 1.7796610169491527, "percentage": 35.59, "elapsed_time": "0:06:23", "remaining_time": "0:11:33", "throughput": 1650.99, "total_tokens": 633008}
{"current_steps": 740, "total_steps": 2065, "loss": 0.2014, "lr": 4.05494457683869e-05, "epoch": 1.791767554479419, "percentage": 35.84, "elapsed_time": "0:06:23", "remaining_time": "0:11:27", "throughput": 1660.72, "total_tokens": 637360}
{"current_steps": 745, "total_steps": 2065, "loss": 0.2152, "lr": 4.038339309269002e-05, "epoch": 1.8038740920096852, "percentage": 36.08, "elapsed_time": "0:06:24", "remaining_time": "0:11:20", "throughput": 1670.27, "total_tokens": 641648}
{"current_steps": 750, "total_steps": 2065, "loss": 0.192, "lr": 4.021624091027895e-05, "epoch": 1.8159806295399514, "percentage": 36.32, "elapsed_time": "0:06:24", "remaining_time": "0:11:14", "throughput": 1678.85, "total_tokens": 645552}
{"current_steps": 755, "total_steps": 2065, "loss": 0.3049, "lr": 4.004800116812441e-05, "epoch": 1.828087167070218, "percentage": 36.56, "elapsed_time": "0:06:24", "remaining_time": "0:11:07", "throughput": 1688.54, "total_tokens": 649904}
{"current_steps": 760, "total_steps": 2065, "loss": 0.184, "lr": 3.987868589092893e-05, "epoch": 1.8401937046004844, "percentage": 36.8, "elapsed_time": "0:06:25", "remaining_time": "0:11:01", "throughput": 1697.87, "total_tokens": 654128}
{"current_steps": 765, "total_steps": 2065, "loss": 0.1914, "lr": 3.9708307180267456e-05, "epoch": 1.8523002421307506, "percentage": 37.05, "elapsed_time": "0:06:25", "remaining_time": "0:10:55", "throughput": 1707.98, "total_tokens": 658672}
{"current_steps": 770, "total_steps": 2065, "loss": 0.4553, "lr": 3.953687721372233e-05, "epoch": 1.8644067796610169, "percentage": 37.29, "elapsed_time": "0:06:26", "remaining_time": "0:10:49", "throughput": 1717.76, "total_tokens": 663088}
{"current_steps": 775, "total_steps": 2065, "loss": 0.1709, "lr": 3.936440824401299e-05, "epoch": 1.8765133171912833, "percentage": 37.53, "elapsed_time": "0:06:26", "remaining_time": "0:10:43", "throughput": 1727.35, "total_tokens": 667440}
{"current_steps": 780, "total_steps": 2065, "loss": 0.1831, "lr": 3.919091259812013e-05, "epoch": 1.8886198547215496, "percentage": 37.77, "elapsed_time": "0:06:26", "remaining_time": "0:10:37", "throughput": 1736.94, "total_tokens": 671792}
{"current_steps": 785, "total_steps": 2065, "loss": 0.2175, "lr": 3.9016402676404753e-05, "epoch": 1.900726392251816, "percentage": 38.01, "elapsed_time": "0:06:27", "remaining_time": "0:10:31", "throughput": 1746.98, "total_tokens": 676336}
{"current_steps": 790, "total_steps": 2065, "loss": 0.18, "lr": 3.884089095172181e-05, "epoch": 1.9128329297820823, "percentage": 38.26, "elapsed_time": "0:06:27", "remaining_time": "0:10:25", "throughput": 1756.37, "total_tokens": 680624}
{"current_steps": 795, "total_steps": 2065, "loss": 0.1914, "lr": 3.866438996852872e-05, "epoch": 1.9249394673123486, "percentage": 38.5, "elapsed_time": "0:06:27", "remaining_time": "0:10:19", "throughput": 1766.05, "total_tokens": 685040}
{"current_steps": 800, "total_steps": 2065, "loss": 0.1935, "lr": 3.848691234198879e-05, "epoch": 1.937046004842615, "percentage": 38.74, "elapsed_time": "0:06:28", "remaining_time": "0:10:13", "throughput": 1775.55, "total_tokens": 689392}
{"current_steps": 805, "total_steps": 2065, "loss": 0.2046, "lr": 3.830847075706956e-05, "epoch": 1.9491525423728815, "percentage": 38.98, "elapsed_time": "0:06:28", "remaining_time": "0:10:08", "throughput": 1784.57, "total_tokens": 693552}
{"current_steps": 810, "total_steps": 2065, "loss": 0.2291, "lr": 3.812907796763616e-05, "epoch": 1.9612590799031477, "percentage": 39.23, "elapsed_time": "0:06:29", "remaining_time": "0:10:02", "throughput": 1794.36, "total_tokens": 698032}
{"current_steps": 815, "total_steps": 2065, "loss": 0.1751, "lr": 3.7948746795539745e-05, "epoch": 1.973365617433414, "percentage": 39.47, "elapsed_time": "0:06:29", "remaining_time": "0:09:57", "throughput": 1802.88, "total_tokens": 702000}
{"current_steps": 820, "total_steps": 2065, "loss": 0.1795, "lr": 3.776749012970105e-05, "epoch": 1.9854721549636802, "percentage": 39.71, "elapsed_time": "0:06:29", "remaining_time": "0:09:51", "throughput": 1811.84, "total_tokens": 706160}
{"current_steps": 825, "total_steps": 2065, "loss": 0.1852, "lr": 3.758532092518924e-05, "epoch": 1.9975786924939467, "percentage": 39.95, "elapsed_time": "0:06:30", "remaining_time": "0:09:46", "throughput": 1821.89, "total_tokens": 710768}
{"current_steps": 830, "total_steps": 2065, "loss": 0.256, "lr": 3.740225220229587e-05, "epoch": 2.009685230024213, "percentage": 40.19, "elapsed_time": "0:06:30", "remaining_time": "0:09:41", "throughput": 1829.93, "total_tokens": 714744}
{"current_steps": 832, "total_steps": 2065, "eval_loss": 0.1934857964515686, "epoch": 2.0145278450363198, "percentage": 40.29, "elapsed_time": "0:06:31", "remaining_time": "0:09:39", "throughput": 1830.36, "total_tokens": 716344}
{"current_steps": 835, "total_steps": 2065, "loss": 0.1878, "lr": 3.721829704560436e-05, "epoch": 2.0217917675544794, "percentage": 40.44, "elapsed_time": "0:07:01", "remaining_time": "0:10:21", "throughput": 1704.63, "total_tokens": 718776}
{"current_steps": 840, "total_steps": 2065, "loss": 0.2215, "lr": 3.7033468603054725e-05, "epoch": 2.0338983050847457, "percentage": 40.68, "elapsed_time": "0:07:02", "remaining_time": "0:10:15", "throughput": 1712.55, "total_tokens": 722744}
{"current_steps": 845, "total_steps": 2065, "loss": 0.1657, "lr": 3.6847780085003905e-05, "epoch": 2.046004842615012, "percentage": 40.92, "elapsed_time": "0:07:02", "remaining_time": "0:10:09", "throughput": 1721.42, "total_tokens": 727160}
{"current_steps": 850, "total_steps": 2065, "loss": 0.1957, "lr": 3.666124476328155e-05, "epoch": 2.0581113801452786, "percentage": 41.16, "elapsed_time": "0:07:02", "remaining_time": "0:10:04", "throughput": 1730.33, "total_tokens": 731576}
{"current_steps": 855, "total_steps": 2065, "loss": 0.1881, "lr": 3.647387597024139e-05, "epoch": 2.070217917675545, "percentage": 41.4, "elapsed_time": "0:07:03", "remaining_time": "0:09:58", "throughput": 1739.66, "total_tokens": 736184}
{"current_steps": 860, "total_steps": 2065, "loss": 0.2041, "lr": 3.6285687097808394e-05, "epoch": 2.082324455205811, "percentage": 41.65, "elapsed_time": "0:07:03", "remaining_time": "0:09:53", "throughput": 1748.23, "total_tokens": 740472}
{"current_steps": 865, "total_steps": 2065, "loss": 0.213, "lr": 3.609669159652158e-05, "epoch": 2.0944309927360774, "percentage": 41.89, "elapsed_time": "0:07:03", "remaining_time": "0:09:48", "throughput": 1756.79, "total_tokens": 744760}
{"current_steps": 870, "total_steps": 2065, "loss": 0.1913, "lr": 3.590690297457262e-05, "epoch": 2.106537530266344, "percentage": 42.13, "elapsed_time": "0:07:04", "remaining_time": "0:09:42", "throughput": 1765.65, "total_tokens": 749176}
{"current_steps": 875, "total_steps": 2065, "loss": 0.1961, "lr": 3.57163347968404e-05, "epoch": 2.1186440677966103, "percentage": 42.37, "elapsed_time": "0:07:04", "remaining_time": "0:09:37", "throughput": 1774.36, "total_tokens": 753528}
{"current_steps": 880, "total_steps": 2065, "loss": 0.1981, "lr": 3.552500068392147e-05, "epoch": 2.1307506053268765, "percentage": 42.62, "elapsed_time": "0:07:05", "remaining_time": "0:09:32", "throughput": 1782.59, "total_tokens": 757688}
{"current_steps": 885, "total_steps": 2065, "loss": 0.2002, "lr": 3.533291431115653e-05, "epoch": 2.142857142857143, "percentage": 42.86, "elapsed_time": "0:07:05", "remaining_time": "0:09:27", "throughput": 1791.24, "total_tokens": 762040}
{"current_steps": 890, "total_steps": 2065, "loss": 0.1856, "lr": 3.514008940765304e-05, "epoch": 2.154963680387409, "percentage": 43.1, "elapsed_time": "0:07:05", "remaining_time": "0:09:22", "throughput": 1799.47, "total_tokens": 766200}
{"current_steps": 895, "total_steps": 2065, "loss": 0.2107, "lr": 3.494653975530388e-05, "epoch": 2.1670702179176757, "percentage": 43.34, "elapsed_time": "0:07:06", "remaining_time": "0:09:17", "throughput": 1808.39, "total_tokens": 770680}
{"current_steps": 900, "total_steps": 2065, "loss": 0.1771, "lr": 3.475227918780239e-05, "epoch": 2.179176755447942, "percentage": 43.58, "elapsed_time": "0:07:06", "remaining_time": "0:09:12", "throughput": 1816.58, "total_tokens": 774840}
{"current_steps": 905, "total_steps": 2065, "loss": 0.1924, "lr": 3.4557321589653556e-05, "epoch": 2.1912832929782082, "percentage": 43.83, "elapsed_time": "0:07:06", "remaining_time": "0:09:07", "throughput": 1825.19, "total_tokens": 779192}
{"current_steps": 910, "total_steps": 2065, "loss": 0.1687, "lr": 3.436168089518168e-05, "epoch": 2.2033898305084745, "percentage": 44.07, "elapsed_time": "0:07:07", "remaining_time": "0:09:02", "throughput": 1833.92, "total_tokens": 783608}
{"current_steps": 915, "total_steps": 2065, "loss": 0.1922, "lr": 3.416537108753443e-05, "epoch": 2.2154963680387407, "percentage": 44.31, "elapsed_time": "0:07:07", "remaining_time": "0:08:57", "throughput": 1842.79, "total_tokens": 788088}
{"current_steps": 920, "total_steps": 2065, "loss": 0.1721, "lr": 3.3968406197683376e-05, "epoch": 2.2276029055690074, "percentage": 44.55, "elapsed_time": "0:07:08", "remaining_time": "0:08:52", "throughput": 1851.61, "total_tokens": 792568}
{"current_steps": 925, "total_steps": 2065, "loss": 0.2058, "lr": 3.3770800303421254e-05, "epoch": 2.2397094430992737, "percentage": 44.79, "elapsed_time": "0:07:08", "remaining_time": "0:08:47", "throughput": 1860.74, "total_tokens": 797176}
{"current_steps": 930, "total_steps": 2065, "loss": 0.1925, "lr": 3.357256752835561e-05, "epoch": 2.25181598062954, "percentage": 45.04, "elapsed_time": "0:07:08", "remaining_time": "0:08:43", "throughput": 1868.97, "total_tokens": 801400}
{"current_steps": 935, "total_steps": 2065, "loss": 0.1601, "lr": 3.3373722040899517e-05, "epoch": 2.263922518159806, "percentage": 45.28, "elapsed_time": "0:07:09", "remaining_time": "0:08:38", "throughput": 1877.92, "total_tokens": 805944}
{"current_steps": 936, "total_steps": 2065, "eval_loss": 0.38670673966407776, "epoch": 2.2663438256658597, "percentage": 45.33, "elapsed_time": "0:07:11", "remaining_time": "0:08:40", "throughput": 1869.66, "total_tokens": 806712}
{"current_steps": 940, "total_steps": 2065, "loss": 0.9421, "lr": 3.317427805325875e-05, "epoch": 2.2760290556900724, "percentage": 45.52, "elapsed_time": "0:07:46", "remaining_time": "0:09:18", "throughput": 1736.71, "total_tokens": 810040}
{"current_steps": 945, "total_steps": 2065, "loss": 0.191, "lr": 3.297424982041609e-05, "epoch": 2.288135593220339, "percentage": 45.76, "elapsed_time": "0:07:46", "remaining_time": "0:09:13", "throughput": 1744.65, "total_tokens": 814392}
{"current_steps": 950, "total_steps": 2065, "loss": 0.1962, "lr": 3.277365163911243e-05, "epoch": 2.3002421307506054, "percentage": 46.0, "elapsed_time": "0:07:47", "remaining_time": "0:09:08", "throughput": 1752.84, "total_tokens": 818872}
{"current_steps": 955, "total_steps": 2065, "loss": 0.2261, "lr": 3.257249784682492e-05, "epoch": 2.3123486682808716, "percentage": 46.25, "elapsed_time": "0:07:47", "remaining_time": "0:09:03", "throughput": 1760.48, "total_tokens": 823096}
{"current_steps": 960, "total_steps": 2065, "loss": 0.1945, "lr": 3.2370802820742275e-05, "epoch": 2.324455205811138, "percentage": 46.49, "elapsed_time": "0:07:47", "remaining_time": "0:08:58", "throughput": 1767.72, "total_tokens": 827128}
{"current_steps": 965, "total_steps": 2065, "loss": 0.2272, "lr": 3.2168580976737104e-05, "epoch": 2.3365617433414045, "percentage": 46.73, "elapsed_time": "0:07:48", "remaining_time": "0:08:53", "throughput": 1775.21, "total_tokens": 831288}
{"current_steps": 970, "total_steps": 2065, "loss": 0.1824, "lr": 3.196584676833562e-05, "epoch": 2.348668280871671, "percentage": 46.97, "elapsed_time": "0:07:48", "remaining_time": "0:08:49", "throughput": 1783.09, "total_tokens": 835640}
{"current_steps": 975, "total_steps": 2065, "loss": 0.156, "lr": 3.1762614685684567e-05, "epoch": 2.360774818401937, "percentage": 47.22, "elapsed_time": "0:07:49", "remaining_time": "0:08:44", "throughput": 1790.43, "total_tokens": 839736}
{"current_steps": 980, "total_steps": 2065, "loss": 0.2199, "lr": 3.155889925451557e-05, "epoch": 2.3728813559322033, "percentage": 47.46, "elapsed_time": "0:07:49", "remaining_time": "0:08:39", "throughput": 1798.14, "total_tokens": 844024}
{"current_steps": 985, "total_steps": 2065, "loss": 0.1885, "lr": 3.1354715035106894e-05, "epoch": 2.38498789346247, "percentage": 47.7, "elapsed_time": "0:07:49", "remaining_time": "0:08:35", "throughput": 1805.72, "total_tokens": 848248}
{"current_steps": 990, "total_steps": 2065, "loss": 0.1645, "lr": 3.1150076621242816e-05, "epoch": 2.3970944309927362, "percentage": 47.94, "elapsed_time": "0:07:50", "remaining_time": "0:08:30", "throughput": 1813.28, "total_tokens": 852472}
{"current_steps": 995, "total_steps": 2065, "loss": 0.1747, "lr": 3.0944998639170544e-05, "epoch": 2.4092009685230025, "percentage": 48.18, "elapsed_time": "0:07:50", "remaining_time": "0:08:25", "throughput": 1821.09, "total_tokens": 856824}
{"current_steps": 1000, "total_steps": 2065, "loss": 0.1751, "lr": 3.073949574655479e-05, "epoch": 2.4213075060532687, "percentage": 48.43, "elapsed_time": "0:07:50", "remaining_time": "0:08:21", "throughput": 1828.5, "total_tokens": 860984}
{"current_steps": 1005, "total_steps": 2065, "loss": 0.1975, "lr": 3.053358263143015e-05, "epoch": 2.433414043583535, "percentage": 48.67, "elapsed_time": "0:07:51", "remaining_time": "0:08:17", "throughput": 1836.17, "total_tokens": 865272}
{"current_steps": 1010, "total_steps": 2065, "loss": 0.1765, "lr": 3.032727401115135e-05, "epoch": 2.4455205811138017, "percentage": 48.91, "elapsed_time": "0:07:51", "remaining_time": "0:08:12", "throughput": 1843.81, "total_tokens": 869560}
{"current_steps": 1015, "total_steps": 2065, "loss": 0.1624, "lr": 3.012058463134126e-05, "epoch": 2.457627118644068, "percentage": 49.15, "elapsed_time": "0:07:51", "remaining_time": "0:08:08", "throughput": 1851.71, "total_tokens": 873976}
{"current_steps": 1020, "total_steps": 2065, "loss": 0.2237, "lr": 2.991352926483702e-05, "epoch": 2.469733656174334, "percentage": 49.39, "elapsed_time": "0:07:52", "remaining_time": "0:08:03", "throughput": 1859.2, "total_tokens": 878200}
{"current_steps": 1025, "total_steps": 2065, "loss": 0.2024, "lr": 2.9706122710634165e-05, "epoch": 2.4818401937046004, "percentage": 49.64, "elapsed_time": "0:07:52", "remaining_time": "0:07:59", "throughput": 1867.61, "total_tokens": 882872}
{"current_steps": 1030, "total_steps": 2065, "loss": 0.2673, "lr": 2.949837979282889e-05, "epoch": 2.4939467312348667, "percentage": 49.88, "elapsed_time": "0:07:53", "remaining_time": "0:07:55", "throughput": 1875.07, "total_tokens": 887096}
{"current_steps": 1035, "total_steps": 2065, "loss": 0.2168, "lr": 2.92903153595585e-05, "epoch": 2.5060532687651333, "percentage": 50.12, "elapsed_time": "0:07:53", "remaining_time": "0:07:51", "throughput": 1883.05, "total_tokens": 891576}
{"current_steps": 1040, "total_steps": 2065, "loss": 0.1768, "lr": 2.908194428194019e-05, "epoch": 2.5181598062953996, "percentage": 50.36, "elapsed_time": "0:07:53", "remaining_time": "0:07:47", "throughput": 1890.36, "total_tokens": 895736}
{"current_steps": 1040, "total_steps": 2065, "eval_loss": 0.1943914145231247, "epoch": 2.5181598062953996, "percentage": 50.36, "elapsed_time": "0:07:54", "remaining_time": "0:07:47", "throughput": 1887.68, "total_tokens": 895736}
{"current_steps": 1045, "total_steps": 2065, "loss": 0.1555, "lr": 2.88732814530081e-05, "epoch": 2.530266343825666, "percentage": 50.61, "elapsed_time": "0:08:52", "remaining_time": "0:08:39", "throughput": 1691.76, "total_tokens": 900024}
{"current_steps": 1050, "total_steps": 2065, "loss": 0.1744, "lr": 2.866434178664893e-05, "epoch": 2.542372881355932, "percentage": 50.85, "elapsed_time": "0:08:52", "remaining_time": "0:08:34", "throughput": 1698.88, "total_tokens": 904440}
{"current_steps": 1055, "total_steps": 2065, "loss": 0.1842, "lr": 2.8455140216535947e-05, "epoch": 2.5544794188861983, "percentage": 51.09, "elapsed_time": "0:08:52", "remaining_time": "0:08:30", "throughput": 1705.74, "total_tokens": 908728}
{"current_steps": 1060, "total_steps": 2065, "loss": 0.2018, "lr": 2.8245691695061604e-05, "epoch": 2.566585956416465, "percentage": 51.33, "elapsed_time": "0:08:53", "remaining_time": "0:08:25", "throughput": 1712.59, "total_tokens": 913016}
{"current_steps": 1065, "total_steps": 2065, "loss": 0.2027, "lr": 2.8036011192268863e-05, "epoch": 2.5786924939467313, "percentage": 51.57, "elapsed_time": "0:08:53", "remaining_time": "0:08:20", "throughput": 1719.43, "total_tokens": 917304}
{"current_steps": 1070, "total_steps": 2065, "loss": 0.1984, "lr": 2.7826113694781252e-05, "epoch": 2.5907990314769975, "percentage": 51.82, "elapsed_time": "0:08:53", "remaining_time": "0:08:16", "throughput": 1726.15, "total_tokens": 921528}
{"current_steps": 1075, "total_steps": 2065, "loss": 0.1674, "lr": 2.761601420473168e-05, "epoch": 2.6029055690072638, "percentage": 52.06, "elapsed_time": "0:08:54", "remaining_time": "0:08:11", "throughput": 1733.21, "total_tokens": 925944}
{"current_steps": 1080, "total_steps": 2065, "loss": 0.1523, "lr": 2.740572773869019e-05, "epoch": 2.61501210653753, "percentage": 52.3, "elapsed_time": "0:08:54", "remaining_time": "0:08:07", "throughput": 1740.95, "total_tokens": 930744}
{"current_steps": 1085, "total_steps": 2065, "loss": 0.1263, "lr": 2.7195269326590682e-05, "epoch": 2.6271186440677967, "percentage": 52.54, "elapsed_time": "0:08:54", "remaining_time": "0:08:03", "throughput": 1748.34, "total_tokens": 935352}
{"current_steps": 1090, "total_steps": 2065, "loss": 0.1656, "lr": 2.6984654010656667e-05, "epoch": 2.639225181598063, "percentage": 52.78, "elapsed_time": "0:08:55", "remaining_time": "0:07:58", "throughput": 1755.13, "total_tokens": 939640}
{"current_steps": 1095, "total_steps": 2065, "loss": 0.2926, "lr": 2.6773896844326125e-05, "epoch": 2.651331719128329, "percentage": 53.03, "elapsed_time": "0:08:55", "remaining_time": "0:07:54", "throughput": 1761.45, "total_tokens": 943672}
{"current_steps": 1100, "total_steps": 2065, "loss": 0.1547, "lr": 2.656301289117561e-05, "epoch": 2.663438256658596, "percentage": 53.27, "elapsed_time": "0:08:56", "remaining_time": "0:07:50", "throughput": 1767.77, "total_tokens": 947704}
{"current_steps": 1105, "total_steps": 2065, "loss": 0.2428, "lr": 2.6352017223843585e-05, "epoch": 2.6755447941888617, "percentage": 53.51, "elapsed_time": "0:08:56", "remaining_time": "0:07:46", "throughput": 1774.42, "total_tokens": 951928}
{"current_steps": 1110, "total_steps": 2065, "loss": 0.1649, "lr": 2.6140924922953125e-05, "epoch": 2.6876513317191284, "percentage": 53.75, "elapsed_time": "0:08:56", "remaining_time": "0:07:41", "throughput": 1781.17, "total_tokens": 956216}
{"current_steps": 1115, "total_steps": 2065, "loss": 0.1597, "lr": 2.5929751076034058e-05, "epoch": 2.6997578692493946, "percentage": 54.0, "elapsed_time": "0:08:57", "remaining_time": "0:07:37", "throughput": 1787.91, "total_tokens": 960504}
{"current_steps": 1120, "total_steps": 2065, "loss": 0.1407, "lr": 2.571851077644461e-05, "epoch": 2.711864406779661, "percentage": 54.24, "elapsed_time": "0:08:57", "remaining_time": "0:07:33", "throughput": 1795.11, "total_tokens": 965048}
{"current_steps": 1125, "total_steps": 2065, "loss": 0.1667, "lr": 2.5507219122292598e-05, "epoch": 2.7239709443099276, "percentage": 54.48, "elapsed_time": "0:08:57", "remaining_time": "0:07:29", "throughput": 1801.6, "total_tokens": 969208}
{"current_steps": 1130, "total_steps": 2065, "loss": 0.1438, "lr": 2.529589121535636e-05, "epoch": 2.736077481840194, "percentage": 54.72, "elapsed_time": "0:08:58", "remaining_time": "0:07:25", "throughput": 1808.55, "total_tokens": 973624}
{"current_steps": 1135, "total_steps": 2065, "loss": 0.2294, "lr": 2.5084542160005335e-05, "epoch": 2.74818401937046, "percentage": 54.96, "elapsed_time": "0:08:58", "remaining_time": "0:07:21", "throughput": 1815.37, "total_tokens": 977976}
{"current_steps": 1140, "total_steps": 2065, "loss": 0.1964, "lr": 2.487318706212051e-05, "epoch": 2.7602905569007263, "percentage": 55.21, "elapsed_time": "0:08:59", "remaining_time": "0:07:17", "throughput": 1821.95, "total_tokens": 982200}
{"current_steps": 1144, "total_steps": 2065, "eval_loss": 0.19318054616451263, "epoch": 2.7699757869249395, "percentage": 55.4, "elapsed_time": "0:09:00", "remaining_time": "0:07:14", "throughput": 1825.11, "total_tokens": 985592}
{"current_steps": 1145, "total_steps": 2065, "loss": 0.203, "lr": 2.4661841028014785e-05, "epoch": 2.7723970944309926, "percentage": 55.45, "elapsed_time": "0:09:35", "remaining_time": "0:07:42", "throughput": 1714.45, "total_tokens": 986488}
{"current_steps": 1150, "total_steps": 2065, "loss": 0.1983, "lr": 2.445051916335321e-05, "epoch": 2.7845036319612593, "percentage": 55.69, "elapsed_time": "0:09:35", "remaining_time": "0:07:38", "throughput": 1720.25, "total_tokens": 990456}
{"current_steps": 1155, "total_steps": 2065, "loss": 0.1825, "lr": 2.4239236572073352e-05, "epoch": 2.7966101694915255, "percentage": 55.93, "elapsed_time": "0:09:36", "remaining_time": "0:07:33", "throughput": 1726.57, "total_tokens": 994744}
{"current_steps": 1160, "total_steps": 2065, "loss": 0.178, "lr": 2.4028008355305815e-05, "epoch": 2.8087167070217918, "percentage": 56.17, "elapsed_time": "0:09:36", "remaining_time": "0:07:29", "throughput": 1733.1, "total_tokens": 999160}
{"current_steps": 1165, "total_steps": 2065, "loss": 0.1709, "lr": 2.3816849610294783e-05, "epoch": 2.820823244552058, "percentage": 56.42, "elapsed_time": "0:09:36", "remaining_time": "0:07:25", "throughput": 1739.09, "total_tokens": 1003256}
{"current_steps": 1170, "total_steps": 2065, "loss": 0.1853, "lr": 2.3605775429319115e-05, "epoch": 2.8329297820823243, "percentage": 56.66, "elapsed_time": "0:09:37", "remaining_time": "0:07:21", "throughput": 1745.28, "total_tokens": 1007480}
{"current_steps": 1175, "total_steps": 2065, "loss": 0.1431, "lr": 2.3394800898613535e-05, "epoch": 2.845036319612591, "percentage": 56.9, "elapsed_time": "0:09:37", "remaining_time": "0:07:17", "throughput": 1751.79, "total_tokens": 1011896}
{"current_steps": 1180, "total_steps": 2065, "loss": 0.2253, "lr": 2.318394109729041e-05, "epoch": 2.857142857142857, "percentage": 57.14, "elapsed_time": "0:09:38", "remaining_time": "0:07:13", "throughput": 1757.76, "total_tokens": 1015992}
{"current_steps": 1185, "total_steps": 2065, "loss": 0.1686, "lr": 2.297321109626198e-05, "epoch": 2.8692493946731235, "percentage": 57.38, "elapsed_time": "0:09:38", "remaining_time": "0:07:09", "throughput": 1764.24, "total_tokens": 1020408}
{"current_steps": 1190, "total_steps": 2065, "loss": 0.1988, "lr": 2.27626259571632e-05, "epoch": 2.8813559322033897, "percentage": 57.63, "elapsed_time": "0:09:38", "remaining_time": "0:07:05", "throughput": 1771.03, "total_tokens": 1025016}
{"current_steps": 1195, "total_steps": 2065, "loss": 0.1682, "lr": 2.2552200731275213e-05, "epoch": 2.893462469733656, "percentage": 57.87, "elapsed_time": "0:09:39", "remaining_time": "0:07:01", "throughput": 1777.39, "total_tokens": 1029368}
{"current_steps": 1200, "total_steps": 2065, "loss": 0.1918, "lr": 2.2341950458449576e-05, "epoch": 2.9055690072639226, "percentage": 58.11, "elapsed_time": "0:09:39", "remaining_time": "0:06:57", "throughput": 1783.54, "total_tokens": 1033592}
{"current_steps": 1205, "total_steps": 2065, "loss": 0.2047, "lr": 2.213189016603333e-05, "epoch": 2.917675544794189, "percentage": 58.35, "elapsed_time": "0:09:39", "remaining_time": "0:06:53", "throughput": 1789.47, "total_tokens": 1037688}
{"current_steps": 1210, "total_steps": 2065, "loss": 0.1686, "lr": 2.1922034867794925e-05, "epoch": 2.929782082324455, "percentage": 58.6, "elapsed_time": "0:09:40", "remaining_time": "0:06:50", "throughput": 1795.6, "total_tokens": 1041912}
{"current_steps": 1215, "total_steps": 2065, "loss": 0.1663, "lr": 2.1712399562851147e-05, "epoch": 2.9418886198547214, "percentage": 58.84, "elapsed_time": "0:09:40", "remaining_time": "0:06:46", "throughput": 1802.14, "total_tokens": 1046392}
{"current_steps": 1220, "total_steps": 2065, "loss": 0.1158, "lr": 2.150299923459505e-05, "epoch": 2.9539951573849876, "percentage": 59.08, "elapsed_time": "0:09:41", "remaining_time": "0:06:42", "throughput": 1808.26, "total_tokens": 1050616}
{"current_steps": 1225, "total_steps": 2065, "loss": 0.1857, "lr": 2.1293848849625065e-05, "epoch": 2.9661016949152543, "percentage": 59.32, "elapsed_time": "0:09:41", "remaining_time": "0:06:38", "throughput": 1814.37, "total_tokens": 1054840}
{"current_steps": 1230, "total_steps": 2065, "loss": 0.2051, "lr": 2.108496335667527e-05, "epoch": 2.9782082324455206, "percentage": 59.56, "elapsed_time": "0:09:41", "remaining_time": "0:06:34", "throughput": 1820.25, "total_tokens": 1058936}
{"current_steps": 1235, "total_steps": 2065, "loss": 0.137, "lr": 2.0876357685546944e-05, "epoch": 2.990314769975787, "percentage": 59.81, "elapsed_time": "0:09:42", "remaining_time": "0:06:31", "throughput": 1826.56, "total_tokens": 1063288}
{"current_steps": 1240, "total_steps": 2065, "loss": 0.294, "lr": 2.06680467460415e-05, "epoch": 3.002421307506053, "percentage": 60.05, "elapsed_time": "0:09:42", "remaining_time": "0:06:27", "throughput": 1832.19, "total_tokens": 1067392}
{"current_steps": 1245, "total_steps": 2065, "loss": 0.1436, "lr": 2.0460045426894817e-05, "epoch": 3.0145278450363198, "percentage": 60.29, "elapsed_time": "0:09:42", "remaining_time": "0:06:23", "throughput": 1838.67, "total_tokens": 1071872}
{"current_steps": 1248, "total_steps": 2065, "eval_loss": 0.20527909696102142, "epoch": 3.0217917675544794, "percentage": 60.44, "elapsed_time": "0:09:43", "remaining_time": "0:06:22", "throughput": 1840.65, "total_tokens": 1074624}
{"current_steps": 1250, "total_steps": 2065, "loss": 0.1503, "lr": 2.0252368594713083e-05, "epoch": 3.026634382566586, "percentage": 60.53, "elapsed_time": "0:10:13", "remaining_time": "0:06:40", "throughput": 1753.59, "total_tokens": 1076416}
{"current_steps": 1255, "total_steps": 2065, "loss": 0.156, "lr": 2.004503109291023e-05, "epoch": 3.0387409200968523, "percentage": 60.77, "elapsed_time": "0:10:14", "remaining_time": "0:06:36", "throughput": 1759.2, "total_tokens": 1080512}
{"current_steps": 1260, "total_steps": 2065, "loss": 0.1971, "lr": 1.9838047740647026e-05, "epoch": 3.0508474576271185, "percentage": 61.02, "elapsed_time": "0:10:14", "remaining_time": "0:06:32", "throughput": 1764.79, "total_tokens": 1084608}
{"current_steps": 1265, "total_steps": 2065, "loss": 0.1813, "lr": 1.9631433331771886e-05, "epoch": 3.062953995157385, "percentage": 61.26, "elapsed_time": "0:10:14", "remaining_time": "0:06:28", "throughput": 1770.87, "total_tokens": 1089024}
{"current_steps": 1270, "total_steps": 2065, "loss": 0.133, "lr": 1.9425202633763513e-05, "epoch": 3.0750605326876514, "percentage": 61.5, "elapsed_time": "0:10:15", "remaining_time": "0:06:25", "throughput": 1776.84, "total_tokens": 1093376}
{"current_steps": 1275, "total_steps": 2065, "loss": 0.089, "lr": 1.9219370386675388e-05, "epoch": 3.0871670702179177, "percentage": 61.74, "elapsed_time": "0:10:15", "remaining_time": "0:06:21", "throughput": 1782.81, "total_tokens": 1097728}
{"current_steps": 1280, "total_steps": 2065, "loss": 0.2836, "lr": 1.901395130208229e-05, "epoch": 3.099273607748184, "percentage": 61.99, "elapsed_time": "0:10:16", "remaining_time": "0:06:17", "throughput": 1788.45, "total_tokens": 1101888}
{"current_steps": 1285, "total_steps": 2065, "loss": 0.1116, "lr": 1.880896006202876e-05, "epoch": 3.11138014527845, "percentage": 62.23, "elapsed_time": "0:10:16", "remaining_time": "0:06:14", "throughput": 1794.31, "total_tokens": 1106176}
{"current_steps": 1290, "total_steps": 2065, "loss": 0.1027, "lr": 1.860441131797977e-05, "epoch": 3.123486682808717, "percentage": 62.47, "elapsed_time": "0:10:16", "remaining_time": "0:06:10", "throughput": 1799.87, "total_tokens": 1110272}
{"current_steps": 1295, "total_steps": 2065, "loss": 0.1582, "lr": 1.8400319689773474e-05, "epoch": 3.135593220338983, "percentage": 62.71, "elapsed_time": "0:10:17", "remaining_time": "0:06:07", "throughput": 1805.6, "total_tokens": 1114496}
{"current_steps": 1300, "total_steps": 2065, "loss": 0.0408, "lr": 1.8196699764576318e-05, "epoch": 3.1476997578692494, "percentage": 62.95, "elapsed_time": "0:10:17", "remaining_time": "0:06:03", "throughput": 1811.43, "total_tokens": 1118784}
{"current_steps": 1305, "total_steps": 2065, "loss": 0.1234, "lr": 1.7993566095840443e-05, "epoch": 3.1598062953995156, "percentage": 63.2, "elapsed_time": "0:10:18", "remaining_time": "0:05:59", "throughput": 1817.16, "total_tokens": 1123008}
{"current_steps": 1310, "total_steps": 2065, "loss": 0.2236, "lr": 1.7790933202263434e-05, "epoch": 3.171912832929782, "percentage": 63.44, "elapsed_time": "0:10:18", "remaining_time": "0:05:56", "throughput": 1823.19, "total_tokens": 1127424}
{"current_steps": 1315, "total_steps": 2065, "loss": 0.1958, "lr": 1.758881556675073e-05, "epoch": 3.1840193704600486, "percentage": 63.68, "elapsed_time": "0:10:18", "remaining_time": "0:05:52", "throughput": 1829.2, "total_tokens": 1131840}
{"current_steps": 1320, "total_steps": 2065, "loss": 0.1238, "lr": 1.738722763538036e-05, "epoch": 3.196125907990315, "percentage": 63.92, "elapsed_time": "0:10:19", "remaining_time": "0:05:49", "throughput": 1835.11, "total_tokens": 1136192}
{"current_steps": 1325, "total_steps": 2065, "loss": 0.1027, "lr": 1.7186183816370522e-05, "epoch": 3.208232445520581, "percentage": 64.16, "elapsed_time": "0:10:19", "remaining_time": "0:05:45", "throughput": 1841.02, "total_tokens": 1140544}
{"current_steps": 1330, "total_steps": 2065, "loss": 0.0907, "lr": 1.6985698479049702e-05, "epoch": 3.2203389830508473, "percentage": 64.41, "elapsed_time": "0:10:19", "remaining_time": "0:05:42", "throughput": 1847.51, "total_tokens": 1145280}
{"current_steps": 1335, "total_steps": 2065, "loss": 0.1037, "lr": 1.6785785952829717e-05, "epoch": 3.232445520581114, "percentage": 64.65, "elapsed_time": "0:10:20", "remaining_time": "0:05:39", "throughput": 1853.81, "total_tokens": 1149888}
{"current_steps": 1340, "total_steps": 2065, "loss": 0.1776, "lr": 1.6586460526181473e-05, "epoch": 3.2445520581113803, "percentage": 64.89, "elapsed_time": "0:10:20", "remaining_time": "0:05:35", "throughput": 1859.21, "total_tokens": 1153920}
{"current_steps": 1345, "total_steps": 2065, "loss": 0.2125, "lr": 1.6387736445613772e-05, "epoch": 3.2566585956416465, "percentage": 65.13, "elapsed_time": "0:10:21", "remaining_time": "0:05:32", "throughput": 1865.59, "total_tokens": 1158592}
{"current_steps": 1350, "total_steps": 2065, "loss": 0.2252, "lr": 1.6189627914655008e-05, "epoch": 3.2687651331719128, "percentage": 65.38, "elapsed_time": "0:10:21", "remaining_time": "0:05:29", "throughput": 1871.26, "total_tokens": 1162816}
{"current_steps": 1352, "total_steps": 2065, "eval_loss": 0.2091810256242752, "epoch": 3.2736077481840193, "percentage": 65.47, "elapsed_time": "0:10:22", "remaining_time": "0:05:28", "throughput": 1871.63, "total_tokens": 1164544}
{"current_steps": 1355, "total_steps": 2065, "loss": 0.1163, "lr": 1.599214909283805e-05, "epoch": 3.280871670702179, "percentage": 65.62, "elapsed_time": "0:11:34", "remaining_time": "0:06:03", "throughput": 1680.92, "total_tokens": 1167232}
{"current_steps": 1360, "total_steps": 2065, "loss": 0.1094, "lr": 1.579531409468815e-05, "epoch": 3.2929782082324457, "percentage": 65.86, "elapsed_time": "0:11:34", "remaining_time": "0:06:00", "throughput": 1686.37, "total_tokens": 1171648}
{"current_steps": 1365, "total_steps": 2065, "loss": 0.141, "lr": 1.5599136988714186e-05, "epoch": 3.305084745762712, "percentage": 66.1, "elapsed_time": "0:11:35", "remaining_time": "0:05:56", "throughput": 1691.46, "total_tokens": 1175808}
{"current_steps": 1370, "total_steps": 2065, "loss": 0.1296, "lr": 1.5403631796403085e-05, "epoch": 3.317191283292978, "percentage": 66.34, "elapsed_time": "0:11:35", "remaining_time": "0:05:52", "throughput": 1696.9, "total_tokens": 1180224}
{"current_steps": 1375, "total_steps": 2065, "loss": 0.1375, "lr": 1.520881249121767e-05, "epoch": 3.3292978208232444, "percentage": 66.59, "elapsed_time": "0:11:35", "remaining_time": "0:05:49", "throughput": 1702.42, "total_tokens": 1184704}
{"current_steps": 1380, "total_steps": 2065, "loss": 0.1459, "lr": 1.5014692997597962e-05, "epoch": 3.341404358353511, "percentage": 66.83, "elapsed_time": "0:11:36", "remaining_time": "0:05:45", "throughput": 1707.67, "total_tokens": 1188992}
{"current_steps": 1385, "total_steps": 2065, "loss": 0.1535, "lr": 1.4821287189965866e-05, "epoch": 3.3535108958837774, "percentage": 67.07, "elapsed_time": "0:11:36", "remaining_time": "0:05:42", "throughput": 1713.09, "total_tokens": 1193408}
{"current_steps": 1390, "total_steps": 2065, "loss": 0.1246, "lr": 1.4628608891733625e-05, "epoch": 3.3656174334140436, "percentage": 67.31, "elapsed_time": "0:11:37", "remaining_time": "0:05:38", "throughput": 1718.42, "total_tokens": 1197760}
{"current_steps": 1395, "total_steps": 2065, "loss": 0.0863, "lr": 1.4436671874315722e-05, "epoch": 3.37772397094431, "percentage": 67.55, "elapsed_time": "0:11:37", "remaining_time": "0:05:34", "throughput": 1723.3, "total_tokens": 1201792}
{"current_steps": 1400, "total_steps": 2065, "loss": 0.0968, "lr": 1.4245489856144634e-05, "epoch": 3.389830508474576, "percentage": 67.8, "elapsed_time": "0:11:37", "remaining_time": "0:05:31", "throughput": 1728.18, "total_tokens": 1205824}
{"current_steps": 1405, "total_steps": 2065, "loss": 0.0749, "lr": 1.4055076501690311e-05, "epoch": 3.401937046004843, "percentage": 68.04, "elapsed_time": "0:11:38", "remaining_time": "0:05:27", "throughput": 1733.57, "total_tokens": 1210240}
{"current_steps": 1410, "total_steps": 2065, "loss": 0.09, "lr": 1.3865445420483526e-05, "epoch": 3.414043583535109, "percentage": 68.28, "elapsed_time": "0:11:38", "remaining_time": "0:05:24", "throughput": 1738.7, "total_tokens": 1214464}
{"current_steps": 1415, "total_steps": 2065, "loss": 0.1746, "lr": 1.367661016614315e-05, "epoch": 3.4261501210653753, "percentage": 68.52, "elapsed_time": "0:11:38", "remaining_time": "0:05:21", "throughput": 1743.91, "total_tokens": 1218752}
{"current_steps": 1420, "total_steps": 2065, "loss": 0.0826, "lr": 1.3488584235407439e-05, "epoch": 3.4382566585956416, "percentage": 68.77, "elapsed_time": "0:11:39", "remaining_time": "0:05:17", "throughput": 1749.29, "total_tokens": 1223168}
{"current_steps": 1425, "total_steps": 2065, "loss": 0.1469, "lr": 1.3301381067169366e-05, "epoch": 3.450363196125908, "percentage": 69.01, "elapsed_time": "0:11:39", "remaining_time": "0:05:14", "throughput": 1754.31, "total_tokens": 1227328}
{"current_steps": 1430, "total_steps": 2065, "loss": 0.1454, "lr": 1.3115014041516089e-05, "epoch": 3.4624697336561745, "percentage": 69.25, "elapsed_time": "0:11:39", "remaining_time": "0:05:10", "throughput": 1759.16, "total_tokens": 1231360}
{"current_steps": 1435, "total_steps": 2065, "loss": 0.0455, "lr": 1.2929496478772635e-05, "epoch": 3.4745762711864407, "percentage": 69.49, "elapsed_time": "0:11:40", "remaining_time": "0:05:07", "throughput": 1764.09, "total_tokens": 1235456}
{"current_steps": 1440, "total_steps": 2065, "loss": 0.106, "lr": 1.2744841638549842e-05, "epoch": 3.486682808716707, "percentage": 69.73, "elapsed_time": "0:11:40", "remaining_time": "0:05:04", "throughput": 1769.09, "total_tokens": 1239616}
{"current_steps": 1445, "total_steps": 2065, "loss": 0.0763, "lr": 1.2561062718796662e-05, "epoch": 3.4987893462469732, "percentage": 69.98, "elapsed_time": "0:11:41", "remaining_time": "0:05:00", "throughput": 1774.36, "total_tokens": 1243968}
{"current_steps": 1450, "total_steps": 2065, "loss": 0.0978, "lr": 1.2378172854856831e-05, "epoch": 3.5108958837772395, "percentage": 70.22, "elapsed_time": "0:11:41", "remaining_time": "0:04:57", "throughput": 1779.36, "total_tokens": 1248128}
{"current_steps": 1455, "total_steps": 2065, "loss": 0.1328, "lr": 1.2196185118530063e-05, "epoch": 3.523002421307506, "percentage": 70.46, "elapsed_time": "0:11:41", "remaining_time": "0:04:54", "throughput": 1784.35, "total_tokens": 1252288}
{"current_steps": 1456, "total_steps": 2065, "eval_loss": 0.3491859436035156, "epoch": 3.5254237288135593, "percentage": 70.51, "elapsed_time": "0:11:42", "remaining_time": "0:04:53", "throughput": 1783.88, "total_tokens": 1253248}
{"current_steps": 1460, "total_steps": 2065, "loss": 0.1139, "lr": 1.2015112517137744e-05, "epoch": 3.5351089588377724, "percentage": 70.7, "elapsed_time": "0:12:11", "remaining_time": "0:05:03", "throughput": 1718.3, "total_tokens": 1256640}
{"current_steps": 1465, "total_steps": 2065, "loss": 0.1247, "lr": 1.183496799259326e-05, "epoch": 3.5472154963680387, "percentage": 70.94, "elapsed_time": "0:12:11", "remaining_time": "0:04:59", "throughput": 1723.96, "total_tokens": 1261440}
{"current_steps": 1470, "total_steps": 2065, "loss": 0.0777, "lr": 1.1655764420476988e-05, "epoch": 3.559322033898305, "percentage": 71.19, "elapsed_time": "0:12:12", "remaining_time": "0:04:56", "throughput": 1728.86, "total_tokens": 1265664}
{"current_steps": 1475, "total_steps": 2065, "loss": 0.0848, "lr": 1.1477514609116039e-05, "epoch": 3.571428571428571, "percentage": 71.43, "elapsed_time": "0:12:12", "remaining_time": "0:04:52", "throughput": 1733.92, "total_tokens": 1270016}
{"current_steps": 1480, "total_steps": 2065, "loss": 0.1263, "lr": 1.1300231298668786e-05, "epoch": 3.583535108958838, "percentage": 71.67, "elapsed_time": "0:12:12", "remaining_time": "0:04:49", "throughput": 1739.23, "total_tokens": 1274560}
{"current_steps": 1485, "total_steps": 2065, "loss": 0.1362, "lr": 1.1123927160214289e-05, "epoch": 3.595641646489104, "percentage": 71.91, "elapsed_time": "0:12:13", "remaining_time": "0:04:46", "throughput": 1744.36, "total_tokens": 1278976}
{"current_steps": 1490, "total_steps": 2065, "loss": 0.1068, "lr": 1.0948614794846668e-05, "epoch": 3.6077481840193704, "percentage": 72.15, "elapsed_time": "0:12:13", "remaining_time": "0:04:43", "throughput": 1749.24, "total_tokens": 1283200}
{"current_steps": 1495, "total_steps": 2065, "loss": 0.2069, "lr": 1.0774306732774414e-05, "epoch": 3.619854721549637, "percentage": 72.4, "elapsed_time": "0:12:13", "remaining_time": "0:04:39", "throughput": 1753.94, "total_tokens": 1287296}
{"current_steps": 1500, "total_steps": 2065, "loss": 0.1368, "lr": 1.0601015432424819e-05, "epoch": 3.6319612590799033, "percentage": 72.64, "elapsed_time": "0:12:14", "remaining_time": "0:04:36", "throughput": 1759.06, "total_tokens": 1291712}
{"current_steps": 1505, "total_steps": 2065, "loss": 0.1959, "lr": 1.042875327955356e-05, "epoch": 3.6440677966101696, "percentage": 72.88, "elapsed_time": "0:12:14", "remaining_time": "0:04:33", "throughput": 1763.91, "total_tokens": 1295936}
{"current_steps": 1510, "total_steps": 2065, "loss": 0.0932, "lr": 1.0257532586359422e-05, "epoch": 3.656174334140436, "percentage": 73.12, "elapsed_time": "0:12:15", "remaining_time": "0:04:30", "throughput": 1769.35, "total_tokens": 1300608}
{"current_steps": 1515, "total_steps": 2065, "loss": 0.1347, "lr": 1.0087365590604289e-05, "epoch": 3.668280871670702, "percentage": 73.37, "elapsed_time": "0:12:15", "remaining_time": "0:04:26", "throughput": 1774.45, "total_tokens": 1305024}
{"current_steps": 1520, "total_steps": 2065, "loss": 0.1287, "lr": 9.918264454738504e-06, "epoch": 3.6803874092009687, "percentage": 73.61, "elapsed_time": "0:12:15", "remaining_time": "0:04:23", "throughput": 1779.47, "total_tokens": 1309376}
{"current_steps": 1525, "total_steps": 2065, "loss": 0.0818, "lr": 9.75024126503153e-06, "epoch": 3.692493946731235, "percentage": 73.85, "elapsed_time": "0:12:16", "remaining_time": "0:04:20", "throughput": 1784.39, "total_tokens": 1313664}
{"current_steps": 1530, "total_steps": 2065, "loss": 0.0869, "lr": 9.583308030708135e-06, "epoch": 3.7046004842615012, "percentage": 74.09, "elapsed_time": "0:12:16", "remaining_time": "0:04:17", "throughput": 1789.47, "total_tokens": 1318080}
{"current_steps": 1535, "total_steps": 2065, "loss": 0.0893, "lr": 9.417476683090007e-06, "epoch": 3.7167070217917675, "percentage": 74.33, "elapsed_time": "0:12:16", "remaining_time": "0:04:14", "throughput": 1794.46, "total_tokens": 1322432}
{"current_steps": 1540, "total_steps": 2065, "loss": 0.1556, "lr": 9.252759074743034e-06, "epoch": 3.7288135593220337, "percentage": 74.58, "elapsed_time": "0:12:17", "remaining_time": "0:04:11", "throughput": 1799.53, "total_tokens": 1326848}
{"current_steps": 1545, "total_steps": 2065, "loss": 0.0774, "lr": 9.08916697863014e-06, "epoch": 3.7409200968523004, "percentage": 74.82, "elapsed_time": "0:12:17", "remaining_time": "0:04:08", "throughput": 1804.68, "total_tokens": 1331328}
{"current_steps": 1550, "total_steps": 2065, "loss": 0.1253, "lr": 8.926712087269801e-06, "epoch": 3.7530266343825667, "percentage": 75.06, "elapsed_time": "0:12:18", "remaining_time": "0:04:05", "throughput": 1809.33, "total_tokens": 1335424}
{"current_steps": 1555, "total_steps": 2065, "loss": 0.1276, "lr": 8.765406011900368e-06, "epoch": 3.765133171912833, "percentage": 75.3, "elapsed_time": "0:12:18", "remaining_time": "0:04:02", "throughput": 1814.22, "total_tokens": 1339712}
{"current_steps": 1560, "total_steps": 2065, "loss": 0.1842, "lr": 8.605260281650152e-06, "epoch": 3.777239709443099, "percentage": 75.54, "elapsed_time": "0:12:18", "remaining_time": "0:03:59", "throughput": 1819.11, "total_tokens": 1344000}
{"current_steps": 1560, "total_steps": 2065, "eval_loss": 0.21899566054344177, "epoch": 3.777239709443099, "percentage": 75.54, "elapsed_time": "0:12:19", "remaining_time": "0:03:59", "throughput": 1817.44, "total_tokens": 1344000}
{"current_steps": 1565, "total_steps": 2065, "loss": 0.0881, "lr": 8.446286342713419e-06, "epoch": 3.7893462469733654, "percentage": 75.79, "elapsed_time": "0:12:58", "remaining_time": "0:04:08", "throughput": 1731.06, "total_tokens": 1348224}
{"current_steps": 1570, "total_steps": 2065, "loss": 0.1348, "lr": 8.288495557532241e-06, "epoch": 3.801452784503632, "percentage": 76.03, "elapsed_time": "0:12:59", "remaining_time": "0:04:05", "throughput": 1735.78, "total_tokens": 1352576}
{"current_steps": 1575, "total_steps": 2065, "loss": 0.134, "lr": 8.131899203984463e-06, "epoch": 3.8135593220338984, "percentage": 76.27, "elapsed_time": "0:12:59", "remaining_time": "0:04:02", "throughput": 1740.44, "total_tokens": 1356864}
{"current_steps": 1580, "total_steps": 2065, "loss": 0.1141, "lr": 7.976508474577548e-06, "epoch": 3.8256658595641646, "percentage": 76.51, "elapsed_time": "0:12:59", "remaining_time": "0:03:59", "throughput": 1745.1, "total_tokens": 1361152}
{"current_steps": 1585, "total_steps": 2065, "loss": 0.0705, "lr": 7.822334475648654e-06, "epoch": 3.837772397094431, "percentage": 76.76, "elapsed_time": "0:13:00", "remaining_time": "0:03:56", "throughput": 1749.67, "total_tokens": 1365376}
{"current_steps": 1590, "total_steps": 2065, "loss": 0.0907, "lr": 7.669388226570809e-06, "epoch": 3.849878934624697, "percentage": 77.0, "elapsed_time": "0:13:00", "remaining_time": "0:03:53", "throughput": 1754.39, "total_tokens": 1369728}
{"current_steps": 1595, "total_steps": 2065, "loss": 0.1261, "lr": 7.517680658965329e-06, "epoch": 3.861985472154964, "percentage": 77.24, "elapsed_time": "0:13:01", "remaining_time": "0:03:50", "throughput": 1759.19, "total_tokens": 1374144}
{"current_steps": 1600, "total_steps": 2065, "loss": 0.1084, "lr": 7.367222615920477e-06, "epoch": 3.87409200968523, "percentage": 77.48, "elapsed_time": "0:13:01", "remaining_time": "0:03:47", "throughput": 1763.75, "total_tokens": 1378368}
{"current_steps": 1605, "total_steps": 2065, "loss": 0.0813, "lr": 7.2180248512164896e-06, "epoch": 3.8861985472154963, "percentage": 77.72, "elapsed_time": "0:13:01", "remaining_time": "0:03:44", "throughput": 1768.14, "total_tokens": 1382464}
{"current_steps": 1610, "total_steps": 2065, "loss": 0.0805, "lr": 7.070098028556948e-06, "epoch": 3.898305084745763, "percentage": 77.97, "elapsed_time": "0:13:02", "remaining_time": "0:03:41", "throughput": 1772.92, "total_tokens": 1386880}
{"current_steps": 1615, "total_steps": 2065, "loss": 0.1924, "lr": 6.923452720806611e-06, "epoch": 3.910411622276029, "percentage": 78.21, "elapsed_time": "0:13:02", "remaining_time": "0:03:38", "throughput": 1777.7, "total_tokens": 1391296}
{"current_steps": 1620, "total_steps": 2065, "loss": 0.0609, "lr": 6.778099409235739e-06, "epoch": 3.9225181598062955, "percentage": 78.45, "elapsed_time": "0:13:03", "remaining_time": "0:03:35", "throughput": 1782.16, "total_tokens": 1395456}
{"current_steps": 1625, "total_steps": 2065, "loss": 0.0932, "lr": 6.634048482770946e-06, "epoch": 3.9346246973365617, "percentage": 78.69, "elapsed_time": "0:13:03", "remaining_time": "0:03:32", "throughput": 1786.62, "total_tokens": 1399616}
{"current_steps": 1630, "total_steps": 2065, "loss": 0.1241, "lr": 6.491310237252679e-06, "epoch": 3.946731234866828, "percentage": 78.93, "elapsed_time": "0:13:03", "remaining_time": "0:03:29", "throughput": 1790.99, "total_tokens": 1403712}
{"current_steps": 1635, "total_steps": 2065, "loss": 0.1232, "lr": 6.349894874699344e-06, "epoch": 3.9588377723970947, "percentage": 79.18, "elapsed_time": "0:13:04", "remaining_time": "0:03:26", "throughput": 1795.76, "total_tokens": 1408128}
{"current_steps": 1640, "total_steps": 2065, "loss": 0.0787, "lr": 6.209812502578114e-06, "epoch": 3.970944309927361, "percentage": 79.42, "elapsed_time": "0:13:04", "remaining_time": "0:03:23", "throughput": 1800.44, "total_tokens": 1412480}
{"current_steps": 1645, "total_steps": 2065, "loss": 0.0494, "lr": 6.071073133082492e-06, "epoch": 3.983050847457627, "percentage": 79.66, "elapsed_time": "0:13:04", "remaining_time": "0:03:20", "throughput": 1804.95, "total_tokens": 1416704}
{"current_steps": 1650, "total_steps": 2065, "loss": 0.0969, "lr": 5.933686682416758e-06, "epoch": 3.9951573849878934, "percentage": 79.9, "elapsed_time": "0:13:05", "remaining_time": "0:03:17", "throughput": 1809.72, "total_tokens": 1421120}
{"current_steps": 1655, "total_steps": 2065, "loss": 0.09, "lr": 5.797662970087184e-06, "epoch": 4.00726392251816, "percentage": 80.15, "elapsed_time": "0:13:05", "remaining_time": "0:03:14", "throughput": 1813.5, "total_tokens": 1424944}
{"current_steps": 1660, "total_steps": 2065, "loss": 0.0897, "lr": 5.663011718200201e-06, "epoch": 4.019370460048426, "percentage": 80.39, "elapsed_time": "0:13:06", "remaining_time": "0:03:11", "throughput": 1818.17, "total_tokens": 1429296}
{"current_steps": 1664, "total_steps": 2065, "eval_loss": 0.2532218098640442, "epoch": 4.0290556900726395, "percentage": 80.58, "elapsed_time": "0:13:07", "remaining_time": "0:03:09", "throughput": 1820.54, "total_tokens": 1432880}
{"current_steps": 1665, "total_steps": 2065, "loss": 0.0316, "lr": 5.529742550767544e-06, "epoch": 4.031476997578692, "percentage": 80.63, "elapsed_time": "0:13:55", "remaining_time": "0:03:20", "throughput": 1715.95, "total_tokens": 1433776}
{"current_steps": 1670, "total_steps": 2065, "loss": 0.0492, "lr": 5.397864993018367e-06, "epoch": 4.043583535108959, "percentage": 80.87, "elapsed_time": "0:13:55", "remaining_time": "0:03:17", "throughput": 1720.23, "total_tokens": 1438000}
{"current_steps": 1675, "total_steps": 2065, "loss": 0.029, "lr": 5.267388470718449e-06, "epoch": 4.0556900726392255, "percentage": 81.11, "elapsed_time": "0:13:56", "remaining_time": "0:03:14", "throughput": 1724.67, "total_tokens": 1442352}
{"current_steps": 1680, "total_steps": 2065, "loss": 0.052, "lr": 5.138322309496504e-06, "epoch": 4.067796610169491, "percentage": 81.36, "elapsed_time": "0:13:56", "remaining_time": "0:03:11", "throughput": 1729.11, "total_tokens": 1446704}
{"current_steps": 1685, "total_steps": 2065, "loss": 0.0469, "lr": 5.010675734177631e-06, "epoch": 4.079903147699758, "percentage": 81.6, "elapsed_time": "0:13:57", "remaining_time": "0:03:08", "throughput": 1733.32, "total_tokens": 1450864}
{"current_steps": 1690, "total_steps": 2065, "loss": 0.0316, "lr": 4.884457868124001e-06, "epoch": 4.092009685230024, "percentage": 81.84, "elapsed_time": "0:13:57", "remaining_time": "0:03:05", "throughput": 1737.6, "total_tokens": 1455088}
{"current_steps": 1695, "total_steps": 2065, "loss": 0.0228, "lr": 4.759677732582782e-06, "epoch": 4.1041162227602905, "percentage": 82.08, "elapsed_time": "0:13:57", "remaining_time": "0:03:02", "throughput": 1741.94, "total_tokens": 1459376}
{"current_steps": 1700, "total_steps": 2065, "loss": 0.0529, "lr": 4.636344246041321e-06, "epoch": 4.116222760290557, "percentage": 82.32, "elapsed_time": "0:13:58", "remaining_time": "0:02:59", "throughput": 1746.21, "total_tokens": 1463600}
{"current_steps": 1705, "total_steps": 2065, "loss": 0.0565, "lr": 4.514466223589753e-06, "epoch": 4.128329297820823, "percentage": 82.57, "elapsed_time": "0:13:58", "remaining_time": "0:02:57", "throughput": 1750.77, "total_tokens": 1468080}
{"current_steps": 1710, "total_steps": 2065, "loss": 0.0695, "lr": 4.3940523762909135e-06, "epoch": 4.14043583535109, "percentage": 82.81, "elapsed_time": "0:13:58", "remaining_time": "0:02:54", "throughput": 1755.4, "total_tokens": 1472624}
{"current_steps": 1715, "total_steps": 2065, "loss": 0.0511, "lr": 4.275111310557758e-06, "epoch": 4.1525423728813555, "percentage": 83.05, "elapsed_time": "0:13:59", "remaining_time": "0:02:51", "throughput": 1759.87, "total_tokens": 1477040}
{"current_steps": 1720, "total_steps": 2065, "loss": 0.0311, "lr": 4.1576515275382226e-06, "epoch": 4.164648910411622, "percentage": 83.29, "elapsed_time": "0:13:59", "remaining_time": "0:02:48", "throughput": 1764.19, "total_tokens": 1481328}
{"current_steps": 1725, "total_steps": 2065, "loss": 0.0394, "lr": 4.0416814225076035e-06, "epoch": 4.176755447941889, "percentage": 83.54, "elapsed_time": "0:14:00", "remaining_time": "0:02:45", "throughput": 1768.73, "total_tokens": 1485808}
{"current_steps": 1730, "total_steps": 2065, "loss": 0.0255, "lr": 3.9272092842685345e-06, "epoch": 4.188861985472155, "percentage": 83.78, "elapsed_time": "0:14:00", "remaining_time": "0:02:42", "throughput": 1773.13, "total_tokens": 1490160}
{"current_steps": 1735, "total_steps": 2065, "loss": 0.0073, "lr": 3.814243294558542e-06, "epoch": 4.200968523002421, "percentage": 84.02, "elapsed_time": "0:14:00", "remaining_time": "0:02:39", "throughput": 1777.51, "total_tokens": 1494512}
{"current_steps": 1740, "total_steps": 2065, "loss": 0.0562, "lr": 3.702791527465274e-06, "epoch": 4.213075060532688, "percentage": 84.26, "elapsed_time": "0:14:01", "remaining_time": "0:02:37", "throughput": 1781.45, "total_tokens": 1498480}
{"current_steps": 1745, "total_steps": 2065, "loss": 0.0463, "lr": 3.592861948849416e-06, "epoch": 4.225181598062954, "percentage": 84.5, "elapsed_time": "0:14:01", "remaining_time": "0:02:34", "throughput": 1785.76, "total_tokens": 1502768}
{"current_steps": 1750, "total_steps": 2065, "loss": 0.0429, "lr": 3.484462415775333e-06, "epoch": 4.237288135593221, "percentage": 84.75, "elapsed_time": "0:14:01", "remaining_time": "0:02:31", "throughput": 1789.99, "total_tokens": 1506992}
{"current_steps": 1755, "total_steps": 2065, "loss": 0.0035, "lr": 3.377600675949527e-06, "epoch": 4.249394673123486, "percentage": 84.99, "elapsed_time": "0:14:02", "remaining_time": "0:02:28", "throughput": 1794.51, "total_tokens": 1511472}
{"current_steps": 1760, "total_steps": 2065, "loss": 0.0395, "lr": 3.272284367166825e-06, "epoch": 4.261501210653753, "percentage": 85.23, "elapsed_time": "0:14:02", "remaining_time": "0:02:26", "throughput": 1798.87, "total_tokens": 1515824}
{"current_steps": 1765, "total_steps": 2065, "loss": 0.0337, "lr": 3.1685210167645335e-06, "epoch": 4.27360774818402, "percentage": 85.47, "elapsed_time": "0:14:03", "remaining_time": "0:02:23", "throughput": 1803.24, "total_tokens": 1520176}
{"current_steps": 1768, "total_steps": 2065, "eval_loss": 0.4314914643764496, "epoch": 4.280871670702179, "percentage": 85.62, "elapsed_time": "0:14:04", "remaining_time": "0:02:21", "throughput": 1803.91, "total_tokens": 1522544}
{"current_steps": 1770, "total_steps": 2065, "loss": 0.008, "lr": 3.0663180410843982e-06, "epoch": 4.285714285714286, "percentage": 85.71, "elapsed_time": "0:14:32", "remaining_time": "0:02:25", "throughput": 1747.79, "total_tokens": 1524336}
{"current_steps": 1775, "total_steps": 2065, "loss": 0.1379, "lr": 2.9656827449425494e-06, "epoch": 4.297820823244552, "percentage": 85.96, "elapsed_time": "0:14:32", "remaining_time": "0:02:22", "throughput": 1751.87, "total_tokens": 1528560}
{"current_steps": 1780, "total_steps": 2065, "loss": 0.0391, "lr": 2.86662232110739e-06, "epoch": 4.309927360774818, "percentage": 86.2, "elapsed_time": "0:14:32", "remaining_time": "0:02:19", "throughput": 1755.89, "total_tokens": 1532720}
{"current_steps": 1785, "total_steps": 2065, "loss": 0.0481, "lr": 2.7691438497855134e-06, "epoch": 4.322033898305085, "percentage": 86.44, "elapsed_time": "0:14:33", "remaining_time": "0:02:16", "throughput": 1759.98, "total_tokens": 1536944}
{"current_steps": 1790, "total_steps": 2065, "loss": 0.0365, "lr": 2.673254298115646e-06, "epoch": 4.3341404358353515, "percentage": 86.68, "elapsed_time": "0:14:33", "remaining_time": "0:02:14", "throughput": 1764.07, "total_tokens": 1541168}
{"current_steps": 1795, "total_steps": 2065, "loss": 0.0094, "lr": 2.5789605196706674e-06, "epoch": 4.346246973365617, "percentage": 86.92, "elapsed_time": "0:14:34", "remaining_time": "0:02:11", "throughput": 1768.23, "total_tokens": 1545456}
{"current_steps": 1800, "total_steps": 2065, "loss": 0.0798, "lr": 2.4862692539677906e-06, "epoch": 4.358353510895884, "percentage": 87.17, "elapsed_time": "0:14:34", "remaining_time": "0:02:08", "throughput": 1772.52, "total_tokens": 1549872}
{"current_steps": 1805, "total_steps": 2065, "loss": 0.113, "lr": 2.3951871259868503e-06, "epoch": 4.37046004842615, "percentage": 87.41, "elapsed_time": "0:14:34", "remaining_time": "0:02:06", "throughput": 1776.82, "total_tokens": 1554288}
{"current_steps": 1810, "total_steps": 2065, "loss": 0.1113, "lr": 2.3057206456967905e-06, "epoch": 4.3825665859564165, "percentage": 87.65, "elapsed_time": "0:14:35", "remaining_time": "0:02:03", "throughput": 1780.75, "total_tokens": 1558384}
{"current_steps": 1815, "total_steps": 2065, "loss": 0.0523, "lr": 2.217876207590375e-06, "epoch": 4.394673123486683, "percentage": 87.89, "elapsed_time": "0:14:35", "remaining_time": "0:02:00", "throughput": 1784.75, "total_tokens": 1562544}
{"current_steps": 1820, "total_steps": 2065, "loss": 0.0659, "lr": 2.131660090227139e-06, "epoch": 4.406779661016949, "percentage": 88.14, "elapsed_time": "0:14:35", "remaining_time": "0:01:57", "throughput": 1789.31, "total_tokens": 1567216}
{"current_steps": 1825, "total_steps": 2065, "loss": 0.0756, "lr": 2.0470784557846652e-06, "epoch": 4.418886198547216, "percentage": 88.38, "elapsed_time": "0:14:36", "remaining_time": "0:01:55", "throughput": 1793.51, "total_tokens": 1571568}
{"current_steps": 1830, "total_steps": 2065, "loss": 0.0018, "lr": 1.964137349618114e-06, "epoch": 4.4309927360774815, "percentage": 88.62, "elapsed_time": "0:14:36", "remaining_time": "0:01:52", "throughput": 1797.57, "total_tokens": 1575792}
{"current_steps": 1835, "total_steps": 2065, "loss": 0.0419, "lr": 1.8828426998281689e-06, "epoch": 4.443099273607748, "percentage": 88.86, "elapsed_time": "0:14:36", "remaining_time": "0:01:49", "throughput": 1801.7, "total_tokens": 1580080}
{"current_steps": 1840, "total_steps": 2065, "loss": 0.0692, "lr": 1.8032003168373306e-06, "epoch": 4.455205811138015, "percentage": 89.1, "elapsed_time": "0:14:37", "remaining_time": "0:01:47", "throughput": 1805.53, "total_tokens": 1584112}
{"current_steps": 1845, "total_steps": 2065, "loss": 0.0456, "lr": 1.7252158929746131e-06, "epoch": 4.467312348668281, "percentage": 89.35, "elapsed_time": "0:14:37", "remaining_time": "0:01:44", "throughput": 1809.65, "total_tokens": 1588400}
{"current_steps": 1850, "total_steps": 2065, "loss": 0.0504, "lr": 1.6488950020686955e-06, "epoch": 4.479418886198547, "percentage": 89.59, "elapsed_time": "0:14:38", "remaining_time": "0:01:42", "throughput": 1813.9, "total_tokens": 1592816}
{"current_steps": 1855, "total_steps": 2065, "loss": 0.0573, "lr": 1.5742430990495466e-06, "epoch": 4.491525423728813, "percentage": 89.83, "elapsed_time": "0:14:38", "remaining_time": "0:01:39", "throughput": 1818.23, "total_tokens": 1597296}
{"current_steps": 1860, "total_steps": 2065, "loss": 0.0293, "lr": 1.5012655195585368e-06, "epoch": 4.50363196125908, "percentage": 90.07, "elapsed_time": "0:14:38", "remaining_time": "0:01:36", "throughput": 1822.4, "total_tokens": 1601648}
{"current_steps": 1865, "total_steps": 2065, "loss": 0.1156, "lr": 1.4299674795670764e-06, "epoch": 4.5157384987893465, "percentage": 90.31, "elapsed_time": "0:14:39", "remaining_time": "0:01:34", "throughput": 1826.5, "total_tokens": 1605936}
{"current_steps": 1870, "total_steps": 2065, "loss": 0.126, "lr": 1.360354075003828e-06, "epoch": 4.527845036319612, "percentage": 90.56, "elapsed_time": "0:14:39", "remaining_time": "0:01:31", "throughput": 1830.46, "total_tokens": 1610096}
{"current_steps": 1872, "total_steps": 2065, "eval_loss": 0.42201921343803406, "epoch": 4.532687651331719, "percentage": 90.65, "elapsed_time": "0:14:40", "remaining_time": "0:01:30", "throughput": 1830.66, "total_tokens": 1611760}
{"current_steps": 1875, "total_steps": 2065, "loss": 0.0436, "lr": 1.2924302813904582e-06, "epoch": 4.539951573849879, "percentage": 90.8, "elapsed_time": "0:15:21", "remaining_time": "0:01:33", "throughput": 1752.76, "total_tokens": 1614384}
{"current_steps": 1880, "total_steps": 2065, "loss": 0.0591, "lr": 1.226200953486037e-06, "epoch": 4.552058111380145, "percentage": 91.04, "elapsed_time": "0:15:21", "remaining_time": "0:01:30", "throughput": 1756.85, "total_tokens": 1618800}
{"current_steps": 1885, "total_steps": 2065, "loss": 0.0027, "lr": 1.1616708249400449e-06, "epoch": 4.5641646489104115, "percentage": 91.28, "elapsed_time": "0:15:21", "remaining_time": "0:01:28", "throughput": 1760.65, "total_tokens": 1622960}
{"current_steps": 1890, "total_steps": 2065, "loss": 0.037, "lr": 1.0988445079540388e-06, "epoch": 4.576271186440678, "percentage": 91.53, "elapsed_time": "0:15:22", "remaining_time": "0:01:25", "throughput": 1764.39, "total_tokens": 1627056}
{"current_steps": 1895, "total_steps": 2065, "loss": 0.0205, "lr": 1.0377264929520125e-06, "epoch": 4.588377723970944, "percentage": 91.77, "elapsed_time": "0:15:22", "remaining_time": "0:01:22", "throughput": 1768.39, "total_tokens": 1631408}
{"current_steps": 1900, "total_steps": 2065, "loss": 0.0687, "lr": 9.783211482594285e-07, "epoch": 4.600484261501211, "percentage": 92.01, "elapsed_time": "0:15:22", "remaining_time": "0:01:20", "throughput": 1772.52, "total_tokens": 1635888}
{"current_steps": 1905, "total_steps": 2065, "loss": 0.0049, "lr": 9.206327197910203e-07, "epoch": 4.6125907990314765, "percentage": 92.25, "elapsed_time": "0:15:23", "remaining_time": "0:01:17", "throughput": 1776.45, "total_tokens": 1640176}
{"current_steps": 1910, "total_steps": 2065, "loss": 0.056, "lr": 8.646653307473079e-07, "epoch": 4.624697336561743, "percentage": 92.49, "elapsed_time": "0:15:23", "remaining_time": "0:01:14", "throughput": 1780.44, "total_tokens": 1644528}
{"current_steps": 1915, "total_steps": 2065, "loss": 0.002, "lr": 8.10422981319911e-07, "epoch": 4.63680387409201, "percentage": 92.74, "elapsed_time": "0:15:24", "remaining_time": "0:01:12", "throughput": 1784.83, "total_tokens": 1649264}
{"current_steps": 1920, "total_steps": 2065, "loss": 0.0111, "lr": 7.579095484056192e-07, "epoch": 4.648910411622276, "percentage": 92.98, "elapsed_time": "0:15:24", "remaining_time": "0:01:09", "throughput": 1789.01, "total_tokens": 1653808}
{"current_steps": 1925, "total_steps": 2065, "loss": 0.0023, "lr": 7.07128785329314e-07, "epoch": 4.661016949152542, "percentage": 93.22, "elapsed_time": "0:15:24", "remaining_time": "0:01:07", "throughput": 1793.13, "total_tokens": 1658288}
{"current_steps": 1930, "total_steps": 2065, "loss": 0.0228, "lr": 6.580843215757082e-07, "epoch": 4.673123486682809, "percentage": 93.46, "elapsed_time": "0:15:25", "remaining_time": "0:01:04", "throughput": 1797.04, "total_tokens": 1662576}
{"current_steps": 1935, "total_steps": 2065, "loss": 0.0221, "lr": 6.107796625299117e-07, "epoch": 4.685230024213075, "percentage": 93.7, "elapsed_time": "0:15:25", "remaining_time": "0:01:02", "throughput": 1801.15, "total_tokens": 1667056}
{"current_steps": 1940, "total_steps": 2065, "loss": 0.0733, "lr": 5.652181892269181e-07, "epoch": 4.697336561743342, "percentage": 93.95, "elapsed_time": "0:15:25", "remaining_time": "0:00:59", "throughput": 1805.25, "total_tokens": 1671536}
{"current_steps": 1945, "total_steps": 2065, "loss": 0.0023, "lr": 5.214031581099149e-07, "epoch": 4.709443099273607, "percentage": 94.19, "elapsed_time": "0:15:26", "remaining_time": "0:00:57", "throughput": 1809.22, "total_tokens": 1675888}
{"current_steps": 1950, "total_steps": 2065, "loss": 0.0341, "lr": 4.793377007975719e-07, "epoch": 4.721549636803874, "percentage": 94.43, "elapsed_time": "0:15:26", "remaining_time": "0:00:54", "throughput": 1813.12, "total_tokens": 1680176}
{"current_steps": 1955, "total_steps": 2065, "loss": 0.0568, "lr": 4.3902482386018186e-07, "epoch": 4.733656174334141, "percentage": 94.67, "elapsed_time": "0:15:27", "remaining_time": "0:00:52", "throughput": 1816.95, "total_tokens": 1684400}
{"current_steps": 1960, "total_steps": 2065, "loss": 0.1211, "lr": 4.004674086047905e-07, "epoch": 4.745762711864407, "percentage": 94.92, "elapsed_time": "0:15:27", "remaining_time": "0:00:49", "throughput": 1820.97, "total_tokens": 1688816}
{"current_steps": 1965, "total_steps": 2065, "loss": 0.0408, "lr": 3.636682108692502e-07, "epoch": 4.757869249394673, "percentage": 95.16, "elapsed_time": "0:15:27", "remaining_time": "0:00:47", "throughput": 1825.13, "total_tokens": 1693360}
{"current_steps": 1970, "total_steps": 2065, "loss": 0.0647, "lr": 3.2862986082524416e-07, "epoch": 4.76997578692494, "percentage": 95.4, "elapsed_time": "0:15:28", "remaining_time": "0:00:44", "throughput": 1828.94, "total_tokens": 1697584}
{"current_steps": 1975, "total_steps": 2065, "loss": 0.0336, "lr": 2.953548627903202e-07, "epoch": 4.782082324455206, "percentage": 95.64, "elapsed_time": "0:15:28", "remaining_time": "0:00:42", "throughput": 1832.96, "total_tokens": 1702000}
{"current_steps": 1976, "total_steps": 2065, "eval_loss": 0.4348176121711731, "epoch": 4.784503631961259, "percentage": 95.69, "elapsed_time": "0:15:29", "remaining_time": "0:00:41", "throughput": 1832.42, "total_tokens": 1702832}
{"current_steps": 1980, "total_steps": 2065, "loss": 0.1448, "lr": 2.6384559504886166e-07, "epoch": 4.7941888619854724, "percentage": 95.88, "elapsed_time": "0:16:21", "remaining_time": "0:00:42", "throughput": 1737.89, "total_tokens": 1706416}
{"current_steps": 1985, "total_steps": 2065, "loss": 0.0163, "lr": 2.3410430968214824e-07, "epoch": 4.806295399515738, "percentage": 96.13, "elapsed_time": "0:16:22", "remaining_time": "0:00:39", "throughput": 1741.85, "total_tokens": 1710960}
{"current_steps": 1990, "total_steps": 2065, "loss": 0.1048, "lr": 2.0613313240735454e-07, "epoch": 4.818401937046005, "percentage": 96.37, "elapsed_time": "0:16:22", "remaining_time": "0:00:37", "throughput": 1745.75, "total_tokens": 1715440}
{"current_steps": 1995, "total_steps": 2065, "loss": 0.0295, "lr": 1.7993406242563238e-07, "epoch": 4.830508474576272, "percentage": 96.61, "elapsed_time": "0:16:23", "remaining_time": "0:00:34", "throughput": 1749.45, "total_tokens": 1719728}
{"current_steps": 2000, "total_steps": 2065, "loss": 0.0007, "lr": 1.5550897227922523e-07, "epoch": 4.842615012106537, "percentage": 96.85, "elapsed_time": "0:16:23", "remaining_time": "0:00:31", "throughput": 1753.41, "total_tokens": 1724272}
{"current_steps": 2005, "total_steps": 2065, "loss": 0.064, "lr": 1.3285960771761697e-07, "epoch": 4.854721549636804, "percentage": 97.09, "elapsed_time": "0:16:23", "remaining_time": "0:00:29", "throughput": 1757.11, "total_tokens": 1728560}
{"current_steps": 2010, "total_steps": 2065, "loss": 0.0289, "lr": 1.119875875727705e-07, "epoch": 4.86682808716707, "percentage": 97.34, "elapsed_time": "0:16:24", "remaining_time": "0:00:26", "throughput": 1761.06, "total_tokens": 1733104}
{"current_steps": 2015, "total_steps": 2065, "loss": 0.0127, "lr": 9.289440364341485e-08, "epoch": 4.878934624697337, "percentage": 97.58, "elapsed_time": "0:16:24", "remaining_time": "0:00:24", "throughput": 1764.62, "total_tokens": 1737264}
{"current_steps": 2020, "total_steps": 2065, "loss": 0.0664, "lr": 7.558142058842754e-08, "epoch": 4.891041162227603, "percentage": 97.82, "elapsed_time": "0:16:24", "remaining_time": "0:00:21", "throughput": 1768.18, "total_tokens": 1741424}
{"current_steps": 2025, "total_steps": 2065, "loss": 0.0657, "lr": 6.004987582929055e-08, "epoch": 4.903147699757869, "percentage": 98.06, "elapsed_time": "0:16:25", "remaining_time": "0:00:19", "throughput": 1771.8, "total_tokens": 1745648}
{"current_steps": 2030, "total_steps": 2065, "loss": 0.045, "lr": 4.63008794616554e-08, "epoch": 4.915254237288136, "percentage": 98.31, "elapsed_time": "0:16:25", "remaining_time": "0:00:16", "throughput": 1775.41, "total_tokens": 1749872}
{"current_steps": 2035, "total_steps": 2065, "loss": 0.0431, "lr": 3.433541417599551e-08, "epoch": 4.927360774818402, "percentage": 98.55, "elapsed_time": "0:16:25", "remaining_time": "0:00:14", "throughput": 1779.22, "total_tokens": 1754288}
{"current_steps": 2040, "total_steps": 2065, "loss": 0.0332, "lr": 2.4154335187365207e-08, "epoch": 4.939467312348668, "percentage": 98.79, "elapsed_time": "0:16:26", "remaining_time": "0:00:12", "throughput": 1782.96, "total_tokens": 1758640}
{"current_steps": 2045, "total_steps": 2065, "loss": 0.0602, "lr": 1.5758370174284722e-08, "epoch": 4.951573849878935, "percentage": 99.03, "elapsed_time": "0:16:26", "remaining_time": "0:00:09", "throughput": 1786.63, "total_tokens": 1762928}
{"current_steps": 2050, "total_steps": 2065, "loss": 0.0118, "lr": 9.14811922672898e-09, "epoch": 4.963680387409201, "percentage": 99.27, "elapsed_time": "0:16:27", "remaining_time": "0:00:07", "throughput": 1790.43, "total_tokens": 1767344}
{"current_steps": 2055, "total_steps": 2065, "loss": 0.0392, "lr": 4.324054803223065e-09, "epoch": 4.9757869249394675, "percentage": 99.52, "elapsed_time": "0:16:27", "remaining_time": "0:00:04", "throughput": 1794.1, "total_tokens": 1771632}
{"current_steps": 2060, "total_steps": 2065, "loss": 0.0333, "lr": 1.286521697091425e-09, "epoch": 4.987893462469733, "percentage": 99.76, "elapsed_time": "0:16:27", "remaining_time": "0:00:02", "throughput": 1797.58, "total_tokens": 1775728}
{"current_steps": 2065, "total_steps": 2065, "loss": 0.0653, "lr": 3.5737011805370145e-11, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:16:28", "remaining_time": "0:00:00", "throughput": 1801.14, "total_tokens": 1780000}
{"current_steps": 2065, "total_steps": 2065, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "0:16:54", "remaining_time": "0:00:00", "throughput": 1753.7, "total_tokens": 1780000}